You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/10/17 09:06:16 UTC

svn commit: r1532986 - in /stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight: annotate/ candidates/ model/ utils/

Author: rwesten
Date: Thu Oct 17 07:06:16 2013
New Revision: 1532986

URL: http://svn.apache.org/r1532986
Log:
fix for STANBOL-1069; also some improvements to the Annotate Service done while looking into STANBOL-848 (e.g. added equals and hascode for Annotation class)

Modified:
    stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java
    stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java
    stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java
    stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java
    stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java
    stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java

Modified: stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java?rev=1532986&r1=1532985&r2=1532986&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java (original)
+++ stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java Thu Oct 17 07:06:16 2013
@@ -39,6 +39,7 @@ import java.net.URLEncoder;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Dictionary;
+import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.clerezza.rdf.core.Language;
@@ -53,6 +54,7 @@ import org.apache.felix.scr.annotations.
 import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
 import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
 import org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm;
 import org.apache.stanbol.enhancer.engines.dbpspotlight.utils.SpotlightEngineUtils;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -193,12 +195,13 @@ public class DBPSpotlightAnnotateEnhance
 		String text = SpotlightEngineUtils.getPlainContent(ci);
 
 		Collection<Annotation> dbpslGraph = doPostRequest(text,ci.getUri());
+		Map<SurfaceForm,UriRef> surfaceForm2TextAnnotation = new HashMap<SurfaceForm,UriRef>();
 		if (dbpslGraph != null) {
 			// Acquire a write lock on the ContentItem when adding the
 			// enhancements
 			ci.getLock().writeLock().lock();
 			try {
-				createEnhancements(dbpslGraph, ci, text, language);
+				createEnhancements(dbpslGraph, ci, text, language, surfaceForm2TextAnnotation);
 				if (log.isDebugEnabled()) {
 					Serializer serializer = Serializer.getInstance();
 					ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
@@ -231,22 +234,18 @@ public class DBPSpotlightAnnotateEnhance
 	 *            the content item
 	 */
 	protected void createEnhancements(Collection<Annotation> occs,
-			ContentItem ci, String text, Language language) {
-		//we need to create multiple EntityAnnotations even for the same
-		//suggested Entity, as the scores will be different
-		//HashMap<Resource, UriRef> entityAnnotationMap = new HashMap<Resource, UriRef>();
+			ContentItem ci, String text, Language language,
+			Map<SurfaceForm,UriRef> surfaceForm2TextAnnotation) {
 		for (Annotation occ : occs) {
-			UriRef textAnnotation = SpotlightEngineUtils.createTextEnhancement(
-					occ.surfaceForm, this, ci, text, language);
-
-//			if (entityAnnotationMap.containsKey(occ.uri)) {
-//				model.add(new TripleImpl(entityAnnotationMap.get(occ.uri),
-//						DC_RELATION, textAnnotation));
-//			} else {
-			SpotlightEngineUtils.createEntityAnnotation(occ, this, ci, textAnnotation, language);
-//				entityAnnotationMap.put(occ.uri, entityAnnotation);
+			UriRef textAnnotation = surfaceForm2TextAnnotation.get(occ.surfaceForm);
+			if(textAnnotation == null){ //not yet written ... create a new
+    			textAnnotation = SpotlightEngineUtils.createTextEnhancement(
+    					occ.surfaceForm, this, ci, text, language);
+    			surfaceForm2TextAnnotation.put(occ.surfaceForm,textAnnotation);
 			}
+			SpotlightEngineUtils.createEntityAnnotation(occ, this, ci, textAnnotation, language);
 		}
+	}
 
 
 	/**

Modified: stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java?rev=1532986&r1=1532985&r2=1532986&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java (original)
+++ stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java Thu Oct 17 07:06:16 2013
@@ -247,7 +247,7 @@ public class DBPSpotlightCandidatesEnhan
 				CandidateResource resource = resources.next();
 				UriRef entityAnnotation = SpotlightEngineUtils.createEntityAnnotation(
 						resource, this, ci, textAnnotation);
-				entityAnnotationMap.put(resource.uri, entityAnnotation);
+				entityAnnotationMap.put(resource.localName, entityAnnotation);
 			}
 			if (entityAnnotationMap.containsKey(occ.name)) {
 				model.add(new TripleImpl(entityAnnotationMap.get(occ.name),

Modified: stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java?rev=1532986&r1=1532985&r2=1532986&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java (original)
+++ stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java Thu Oct 17 07:06:16 2013
@@ -144,10 +144,9 @@ public class Annotation {
 			dbpslann.support = (new Integer(node.getAttribute("support")))
 					.intValue();
 			dbpslann.types = node.getAttribute("types");
-			dbpslann.surfaceForm = new SurfaceForm();
-			dbpslann.surfaceForm.name = node.getAttribute("surfaceForm");
-			dbpslann.surfaceForm.offset = (new Integer(node.getAttribute("offset")))
-					.intValue();
+			dbpslann.surfaceForm = new SurfaceForm(
+			    new Integer(node.getAttribute("offset")),
+			    node.getAttribute("surfaceForm"));
 			//set the type of the surface form
 			List<String> dbpediaTypes = dbpslann.getDbpediaTypeNames();
 			if(!dbpediaTypes.isEmpty()){

Modified: stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java?rev=1532986&r1=1532985&r2=1532986&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java (original)
+++ stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java Thu Oct 17 07:06:16 2013
@@ -21,6 +21,7 @@ import static org.apache.stanbol.enhance
 import java.util.Collection;
 import java.util.HashSet;
 
+import org.apache.clerezza.rdf.core.UriRef;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
@@ -36,7 +37,7 @@ import org.w3c.dom.NodeList;
 public class CandidateResource {
 
 	public String label;
-	public String uri;
+	public String localName;
 	public double contextualScore;
 	public double percentageOfSecondRank;
 	public double support;
@@ -47,10 +48,15 @@ public class CandidateResource {
 		return String
 				.format("[label=%s, uri=%s, contextualScore=%d, percentageOfSecondRank=%d, contextualScore=%d, "
 						+ "percentageOfSecondRank=%d, contextualScore=%d]",
-						label, uri, contextualScore, percentageOfSecondRank,
+						label, localName, contextualScore, percentageOfSecondRank,
 						support, priorScore, finalScore);
 	}
 	
+	public UriRef getUri(){
+	    return new UriRef(new StringBuilder("http://dbpedia.org/resource/")
+	    .append(localName).toString());
+	}
+	
 	/**
 	 * This method creates the Collection of surface forms, which the method
 	 * <code>createEnhancement</code> adds to the meta data of the content item
@@ -77,7 +83,7 @@ public class CandidateResource {
 					Element r = (Element) n;
 					CandidateResource resource = new CandidateResource();
 					resource.label = r.getAttribute("label");
-					resource.uri = r.getAttribute("uri");
+					resource.localName = r.getAttribute("uri");
 					resource.contextualScore = (new Double(
 							r.getAttribute("contextualScore"))).doubleValue();
 					resource.percentageOfSecondRank = (new Double(

Modified: stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java?rev=1532986&r1=1532985&r2=1532986&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java (original)
+++ stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java Thu Oct 17 07:06:16 2013
@@ -37,11 +37,34 @@ import org.w3c.dom.NodeList;
  */
 public class SurfaceForm {
 
-	public String name;
+	public final String name;
 	public String type;
-	public Integer offset;
-	public List<CandidateResource> resources = new ArrayList<CandidateResource>();
+	public final Integer offset;
+	public final List<CandidateResource> resources = new ArrayList<CandidateResource>();
 
+	public SurfaceForm(Integer offset, String name){
+	    if(name == null || offset == null){
+	        throw new IllegalArgumentException("Offset and name MUST NOT be NULL");
+	    }
+	    this.name = name;
+	    this.offset = offset;
+	}
+	
+	@Override
+	public int hashCode() {
+	    return name.hashCode()+offset.hashCode();
+	}
+	@Override
+	public boolean equals(Object o) {
+	    if(o instanceof SurfaceForm && name.equals(((SurfaceForm)o).name) &&
+	            offset.equals(((SurfaceForm)o).offset)){
+	        return (type == null && ((SurfaceForm)o).type == null) ||
+	                (type != null && type.equals(((SurfaceForm)o).type));
+	    } else {
+	        return false;
+	    }
+	}
+	
 	public String toString() {
 		return String.format("[name=%s, offset=%i, type=%s]", name, offset,
 				type);
@@ -69,10 +92,9 @@ public class SurfaceForm {
 	}
 
 	protected static SurfaceForm parseSerfaceForm(Element node) {
-		SurfaceForm dbpslann = new SurfaceForm();
-		dbpslann.name = node.getAttribute("name");
-		dbpslann.offset = (new Integer(node.getAttribute("offset")))
-				.intValue();
+		SurfaceForm dbpslann = new SurfaceForm(
+		    new Integer(node.getAttribute("offset")),
+		    node.getAttribute("name"));
 		dbpslann.type = node.getAttribute("type");
 		return dbpslann;
 	}

Modified: stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java?rev=1532986&r1=1532985&r2=1532986&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java (original)
+++ stanbol/trunk/enhancement-engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java Thu Oct 17 07:06:16 2013
@@ -62,6 +62,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
 import org.osgi.service.cm.ConfigurationException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -243,7 +244,7 @@ public class SpotlightEngineUtils {
 		model.add(new TripleImpl(entityAnnotation,
 				ENHANCER_ENTITY_LABEL, label));
 		model.add(new TripleImpl(entityAnnotation,
-				ENHANCER_ENTITY_REFERENCE, new UriRef(resource.uri)));
+				ENHANCER_ENTITY_REFERENCE, resource.getUri()));
 		model.add(new TripleImpl(entityAnnotation, PROPERTY_CONTEXTUAL_SCORE,
 				literalFactory.createTypedLiteral(resource.contextualScore)));
 		model.add(new TripleImpl(entityAnnotation,PROPERTY_PERCENTAGE_OF_SECOND_RANK,