You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/12/02 15:03:54 UTC
svn commit: r1547023 - in /stanbol/trunk/enhancement-engines/dereference:
core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/
core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/
entityhub/src/main/java/org/apache/stanbol...
Author: rwesten
Date: Mon Dec 2 14:03:53 2013
New Revision: 1547023
URL: http://svn.apache.org/r1547023
Log:
definition/implementation of STANBOL-1228, implementation for the Entityhub Dereference Engine (STANBOL-1223); This also includes several improvements and bug fixes for STANBOL-1223
Added:
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceContext.java
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineConfig.java
Removed:
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceUtils.java
Modified:
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceConstants.java
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java
stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java
stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/EntityhubDereferenceEngine.java
stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/TrackingDereferencerBase.java
stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties
Modified: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceConstants.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceConstants.java?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceConstants.java (original)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceConstants.java Mon Dec 2 14:03:53 2013
@@ -16,12 +16,52 @@
*/
package org.apache.stanbol.enhancer.engines.dereference;
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.PlainLiteral;
+import org.apache.clerezza.rdf.core.Triple;
+
/**
* Define configuration parameters for Dereference engines
* @author Rupert Westenthaler
*
*/
public interface DereferenceConstants {
+
+ /**
+ * Property that allows to enable/disable the filtering of {@link Triple}s
+ * with {@link PlainLiteral} {@link Triple#getObject() objects} based on
+ * their {@link Language}. Languages that need to be dereferenced are
+ * parsed to the {@link EntityDereferencer} via the
+ * {@link DereferenceContext#getContentLanguages()}. If empty no languages
+ * MUST BE filtered. <p>
+ * If both this and {@link #FILTER_ACCEPT_LANGUAGES} are enabled the filter
+ * should use the union of the two sets available via
+ * {@link DereferenceContext#getLanguages()}.
+ */
+ String FILTER_CONTENT_LANGUAGES = "enhancer.engine.dereference.filterContentlanguages";
+ /**
+ * By default {@link #FILTER_CONTENT_LANGUAGES} is deactivated
+ */
+ boolean DEFAULT_FILTER_CONTENT_LANGUAGES = false;
+
+ /**
+ * Property that allows to enable/disable the filtering of {@link Triple}s
+ * with {@link PlainLiteral} {@link Triple#getObject() objects} based on
+ * their {@link Language}. Languages that need to be dereferenced are
+ * parsed to the {@link EntityDereferencer} via the
+ * {@link DereferenceContext#getAcceptLanguages()}. If empty no languages
+ * MUST BE filtered.<p>
+ * If both this and {@link #FILTER_CONTENT_LANGUAGES} are enabled the filter
+ * should use the union of the two sets available via
+ * {@link DereferenceContext#getLanguages()}.
+ */
+ String FILTER_ACCEPT_LANGUAGES = "enhancer.engine.dereference.filterAcceptlanguages";
+
+ /**
+ * By default {@link #FILTER_ACCEPT_LANGUAGES} is activated
+ */
+ boolean DEFAULT_FILTER_ACCEPT_LANGUAGES = true;
+
/**
* Property used to configure the fields that should be dereferenced.<p>
* DereferenceEngines need to support a list of URIs but may also support more
Added: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceContext.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceContext.java?rev=1547023&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceContext.java (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceContext.java Mon Dec 2 14:03:53 2013
@@ -0,0 +1,55 @@
+package org.apache.stanbol.enhancer.engines.dereference;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
+
+public class DereferenceContext {
+
+ /**
+ * The {@link OfflineMode} status
+ */
+ protected final boolean offlineMode;
+ /**
+ * Read-only set with languages that need to be dereferenced.
+ */
+ private Set<String> languages = new HashSet<String>();
+
+ /**
+ * Create a new DereferenceContext.
+ * @param offlineMode the {@link OfflineMode} state
+ */
+ protected DereferenceContext(boolean offlineMode){
+ this.offlineMode = offlineMode;
+ }
+
+ /**
+ * If the {@link OfflineMode} is active
+ * @return the offline mode status
+ */
+ public boolean isOfflineMode() {
+ return offlineMode;
+ }
+ /**
+ * Setter for the languages of literals that should be dereferenced
+ * @param languages the ContentLanguages
+ */
+ protected void setLanguages(Set<String> languages) {
+ if(languages == null){
+ this.languages = Collections.emptySet();
+ } else {
+ this.languages = Collections.unmodifiableSet(languages);
+ }
+ }
+ /**
+ * Getter for the languages that should be dereferenced. If
+ * empty all languages should be included.
+ * @return the languages for literals that should be dereferenced.
+ */
+ public Set<String> getLanguages() {
+ return languages;
+ }
+}
Added: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineConfig.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineConfig.java?rev=1547023&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineConfig.java (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineConfig.java Mon Dec 2 14:03:53 2013
@@ -0,0 +1,200 @@
+package org.apache.stanbol.enhancer.engines.dereference;
+
+import static org.apache.stanbol.enhancer.engines.dereference.DereferenceConstants.DEREFERENCE_ENTITIES_FIELDS;
+import static org.apache.stanbol.enhancer.engines.dereference.DereferenceConstants.DEREFERENCE_ENTITIES_LDPATH;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.Hashtable;
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.osgi.framework.Constants;
+import org.osgi.service.cm.ConfigurationException;
+
+public class DereferenceEngineConfig implements DereferenceConstants {
+
+
+ private final Dictionary<String,Object> config;
+
+ /**
+ * Creates a DereferenceEngine configuration based on a Dictionary. Typically
+ * the dictionary will contain keys as defined by {@link DereferenceConstants}
+ * and {@link EnhancementEngine}
+ * @param config the config - typically as parsed in the activate method of
+ * an OSGI component.
+ */
+ public DereferenceEngineConfig(Dictionary<String,Object> config) throws ConfigurationException {
+ this.config = config;
+ validateRequired(config);
+ }
+ /**
+ * Constructor that parses the config (ATM only used by unit tests)
+ * @param name
+ * @param filterContentLang
+ * @param filterAcceptLang
+ * @throws ConfigurationException
+ */
+ protected DereferenceEngineConfig(String name, boolean filterContentLang, boolean filterAcceptLang)
+ throws ConfigurationException {
+ config = new Hashtable<String,Object>();
+ config.put(EnhancementEngine.PROPERTY_NAME, name);
+ config.put(FILTER_CONTENT_LANGUAGES, filterContentLang);
+ config.put(FILTER_ACCEPT_LANGUAGES, filterAcceptLang);
+ validateRequired(config);
+ }
+
+ /**
+ * If filtering for non content language literals is active
+ * @return the {@link DereferenceConstants#FILTER_CONTENT_LANGUAGES} state
+ */
+ public boolean isFilterContentLanguages(){
+ Object value = config.get(FILTER_CONTENT_LANGUAGES);
+ return value == null ? DEFAULT_FILTER_CONTENT_LANGUAGES :
+ Boolean.parseBoolean(value.toString());
+ }
+
+ /**
+ * if filtering for non accept language literals is active
+ * @return the {@link DereferenceConstants#FILTER_ACCEPT_LANGUAGES} state
+ */
+ public boolean isFilterAcceptLanguages(){
+ Object value = config.get(FILTER_ACCEPT_LANGUAGES);
+ return value == null ? DEFAULT_FILTER_ACCEPT_LANGUAGES :
+ Boolean.parseBoolean(value.toString());
+ }
+
+ private void validateRequired(Dictionary<String,Object> config) throws ConfigurationException {
+ Object value = config.get(EnhancementEngine.PROPERTY_NAME);
+ if(value == null || StringUtils.isBlank(value.toString())){
+ throw new ConfigurationException(EnhancementEngine.PROPERTY_NAME,
+ "The EnhancementEngine name MUST NOT be NULL nor empty!");
+ }
+ value = config.get(DEREFERENCE_ENTITIES_FIELDS);
+ if(value != null && !(value instanceof String || value instanceof String[] ||
+ value instanceof Collection<?>)){
+ throw new ConfigurationException(DEREFERENCE_ENTITIES_FIELDS,
+ "Dereference Entities Fields MUST BE parsed as String[], Collection<String> or "
+ + "String (single value). The actual value '"+value+"'(type: '"+value.getClass()
+ + "') is NOT supported");
+ }
+ value = config.get(DEREFERENCE_ENTITIES_LDPATH);
+ if(value != null && !(value instanceof String || value instanceof String[] ||
+ value instanceof Collection<?>)){
+ throw new ConfigurationException(DEREFERENCE_ENTITIES_LDPATH,
+ "Dereference LDPath statements MUST BE parsed as String, String[] or "
+ + "Collection<String>. The actual value '"+value+"'(type: '"+value.getClass()
+ + "') is NOT supported");
+ }
+ }
+
+ /**
+ * Getter for the name of the EnhancementEngine
+ * @return the configured {@link EnhancementEngine#PROPERTY_NAME}
+ */
+ public String getEngineName(){
+ Object value = config.get(EnhancementEngine.PROPERTY_NAME);
+ return value == null ? null : value.toString();
+ }
+ /**
+ * The Integer service ranking for the engine
+ * @return the configured {@link Constants#SERVICE_RANKING}
+ */
+ public Integer getServiceRanking(){
+ Object value = config.get(Constants.SERVICE_RANKING);
+ return value instanceof Integer ? (Integer) value :
+ value instanceof Number ? ((Number)value).intValue() :
+ value != null ? Integer.parseInt(value.toString()) :
+ null;
+
+ }
+ /**
+ * Parsed the {@link DereferenceConstants#DEREFERENCE_ENTITIES_FIELDS}
+ * config from the parsed Dictionary regardless if it is defined as
+ * <code>String[]</code>, <code>Collection<String></code> or
+ * <code>String</code> (single value).<p>
+ * This returns the fields as parsed by the configuration.<p>
+ * <b>NOTE:</b> This does not check/convert <code>{prefix}:{localname}</code>
+ * configurations to URIs. The receiver of the list is responsible for
+ * that
+ * @return the {@link List} with the unprocessed dereference fields as list
+ */
+ public List<String> getDereferenceFields(){
+ Object value = config.get(DEREFERENCE_ENTITIES_FIELDS);
+ final List<String> fields;
+ if(value instanceof String[]){
+ fields = Arrays.asList((String[])value);
+ } else if(value instanceof Collection<?>){
+ fields = new ArrayList<String>(((Collection<?>)value).size());
+ for(Object field : (Collection<?>)value){
+ if(field == null){
+ fields.add(null);
+ } else {
+ fields.add(field.toString());
+ }
+ }
+ } else if(value instanceof String){
+ fields = Collections.singletonList((String)value);
+ } else { //value == null or of unsupported type
+ fields = Collections.emptyList();
+ }
+ return fields;
+ }
+ /**
+ * Parses the LdPath program from the value of the
+ * {@link DereferenceConstants#DEREFERENCE_ENTITIES_LDPATH} property. <p>
+ * This supports <code>String</code> (the program as a single String),
+ * <code>String[]</code> and <code>Collection<String></code> (one
+ * statement per line).<p>
+ * <b>NOTE:</b> This does not parse the LDPath program as this can only be
+ * done by the LdPath repository used by the dereferencer.
+ * @return the unparsed LDPath program as String
+ */
+ public String getLdPathProgram(){
+ Object value = config.get(DEREFERENCE_ENTITIES_LDPATH);
+ if(value == null){
+ return null;
+ } else if(value instanceof String){
+ return StringUtils.isBlank((String) value) ? null : (String) value;
+ }
+ StringBuilder sb = new StringBuilder();
+ boolean first = true;
+ if(value instanceof Collection<?>){
+ for(Object line : (Collection<?>)value){
+ if(line != null && !StringUtils.isBlank(line.toString())){
+ if(first){
+ first = false;
+ } else {
+ sb.append('\n');
+ }
+ sb.append(line.toString());
+ }
+ }
+ } else if(value instanceof String[]){
+ for(String line : (String[])value){
+ if(line != null && !StringUtils.isBlank(line)){
+ if(first){
+ first = false;
+ } else {
+ sb.append('\n');
+ }
+ sb.append(line);
+ }
+ }
+ } //else unsupported type - ignore
+ //if first == false we we have not found any non blank line -> return null!
+ return !first ? sb.toString() : null;
+ }
+ /**
+ * The dictionary holding the config
+ * @return the dictionary holding the config
+ */
+ public Dictionary<String,Object> getDict(){
+ return config;
+ }
+
+}
Modified: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java (original)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java Mon Dec 2 14:03:53 2013
@@ -16,6 +16,7 @@
*/
package org.apache.stanbol.enhancer.engines.dereference;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
import java.util.ArrayList;
@@ -32,6 +33,7 @@ import java.util.concurrent.ExecutorServ
import java.util.concurrent.Future;
import java.util.concurrent.locks.Lock;
+import org.apache.clerezza.rdf.core.Language;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.Resource;
@@ -43,6 +45,8 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -62,8 +66,14 @@ public class EntityDereferenceEngine imp
protected final EntityDereferencer dereferencer;
+ protected final DereferenceEngineConfig config;
+
protected final String name;
+ protected final boolean filterContentLanguages;
+
+ protected final boolean filterAcceptLanguages;
+
/**
* The Map holding the {@link #serviceProperties} for this engine.
*/
@@ -74,11 +84,14 @@ public class EntityDereferenceEngine imp
*/
private final Map<String,Object> unmodServiceProperties = Collections.unmodifiableMap(serviceProperties);
- public EntityDereferenceEngine(String name, EntityDereferencer dereferencer){
- if(StringUtils.isBlank(name)){
- throw new IllegalArgumentException("The parsed EnhancementEngine name MUST NOT be NULL nor empty!");
- }
- this.name = name;
+ public EntityDereferenceEngine(EntityDereferencer dereferencer, DereferenceEngineConfig config){
+ if(config == null){
+ throw new IllegalArgumentException("The parsed DereferenceEngineConfig MUST NOT be NULL!");
+ }
+ this.config = config;
+ this.name = config.getEngineName();
+ this.filterContentLanguages = config.isFilterContentLanguages();
+ this.filterAcceptLanguages = config.isFilterAcceptLanguages();
if(dereferencer == null){
throw new IllegalArgumentException("The parsed EntityDereferencer MUST NOT be NULL!");
}
@@ -114,6 +127,14 @@ public class EntityDereferenceEngine imp
public Integer getEngineOrdering(){
return (Integer)serviceProperties.get(ENHANCEMENT_ENGINE_ORDERING);
}
+
+ /**
+ * Getter for the config of this engine
+ * @return the Dereference Engine Configuration
+ */
+ public DereferenceEngineConfig getConfig() {
+ return config;
+ }
@Override
public Map<String,Object> getServiceProperties() {
@@ -136,11 +157,21 @@ public class EntityDereferenceEngine imp
return;
}
log.debug("> dereference Entities for ContentItem {}", ci.getUri());
+ final DereferenceContext derefContext = new DereferenceContext(offline);
+ Set<String> includedLangs = new HashSet<String>();
+ //TODO: parse accept languages as soon as Enhancement properties are implemented
final MGraph metadata = ci.getMetadata();
Set<UriRef> referencedEntities = new HashSet<UriRef>();
//(1) read all Entities we need to dereference from the parsed contentItem
ci.getLock().readLock().lock();
try {
+ //parse the languages detected for the content
+ if(filterContentLanguages){
+ for(NonLiteral langAnno : EnhancementEngineHelper.getLanguageAnnotations(metadata)){
+ includedLangs.add(EnhancementEngineHelper.getString(metadata, langAnno, DC_LANGUAGE));
+ }
+ } //no content language filtering - leave contentLanguages empty
+ //parse the referenced entities from the graph
Iterator<Triple> entityReferences = metadata.filter(null, ENHANCER_ENTITY_REFERENCE, null);
while(entityReferences.hasNext()){
Triple triple = entityReferences.next();
@@ -162,18 +193,27 @@ public class EntityDereferenceEngine imp
} finally {
ci.getLock().readLock().unlock();
}
+ if(!includedLangs.isEmpty()){
+ includedLangs.add(null); //also include literals without language
+ //and set the list to the dereference context
+ derefContext.setLanguages(includedLangs);
+ } //else no filterLanguages set ... nothing to do
+
final Lock writeLock = ci.getLock().writeLock();
- log.trace(" - scheduled {} Entities for dereferencing", referencedEntities.size());
+ log.trace(" - scheduled {} Entities for dereferencing",
+ referencedEntities.size());
//(2) dereference the Entities
ExecutorService executor = dereferencer.getExecutor();
long start = System.currentTimeMillis();
Set<UriRef> failedEntities = new HashSet<UriRef>();
int dereferencedCount = 0;
- List<DereferenceJob> dereferenceJobs = new ArrayList<DereferenceJob>(referencedEntities.size());
+ List<DereferenceJob> dereferenceJobs = new ArrayList<DereferenceJob>(
+ referencedEntities.size());
if(executor != null && !executor.isShutdown()){ //dereference using executor
//schedule all entities to dereference
for(final UriRef entity : referencedEntities){
- DereferenceJob dereferenceJob = new DereferenceJob(entity, metadata, writeLock);
+ DereferenceJob dereferenceJob = new DereferenceJob(entity,
+ metadata, writeLock, derefContext);
dereferenceJob.setFuture(executor.submit(dereferenceJob));
dereferenceJobs.add(dereferenceJob);
}
@@ -195,7 +235,8 @@ public class EntityDereferenceEngine imp
+ dereferenceJob.entity + "!", e);
} else { //unknown error
throw new EngineException(this,ci, "Unchecked Error while "
- + "dereferencing Entity " + dereferenceJob.entity +"!", e);
+ + "dereferencing Entity " + dereferenceJob.entity
+ + "!", e);
}
}
}
@@ -203,7 +244,7 @@ public class EntityDereferenceEngine imp
for(UriRef entity : referencedEntities){
try {
log.trace(" ... dereference {}", entity);
- if(dereferencer.dereference(entity, metadata, offline, writeLock)){
+ if(dereferencer.dereference(entity, metadata, writeLock, derefContext)){
dereferencedCount++;
log.trace(" + success");
} else {
@@ -245,19 +286,22 @@ public class EntityDereferenceEngine imp
final UriRef entity;
final MGraph metadata;
final Lock writeLock;
+ final DereferenceContext derefContext;
private Future<Boolean> future;
- DereferenceJob(UriRef entity, MGraph metadata, Lock writeLock){
+ DereferenceJob(UriRef entity, MGraph metadata, Lock writeLock,
+ DereferenceContext derefContext){
this.entity = entity;
this.metadata = metadata;
this.writeLock = writeLock;
+ this.derefContext = derefContext;
}
@Override
public Boolean call() throws DereferenceException {
log.trace(" ... dereference {}", entity);
- boolean state = dereferencer.dereference(entity, metadata, offline, writeLock);
+ boolean state = dereferencer.dereference(entity, metadata, writeLock, derefContext);
if(state){
log.trace(" + success");
} else {
Modified: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java (original)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java Mon Dec 2 14:03:53 2013
@@ -47,7 +47,7 @@ public interface EntityDereferencer {
/**
* EntityDereferencer can optionally provide an ExecutorService used to
- * dereference Entities.
+ * dereference Entities.
* @return the {@link ExecutorService} or <code>null</code> if not used
* by this implementation
*/
@@ -58,8 +58,6 @@ public interface EntityDereferencer {
* data to the parsed graph
* @param graph the graph to add the dereferenced entity
* @param entity the uri of the Entity to dereference
- * @param offlineMode <code>true</code> if {@link OfflineMode} is active.
- * Otherwise <code>false</code>
* @param writeLock The writeLock for the graph. Dereferences MUST require
* a <code>{@link Lock#lock() writeLock#lock()}</code> before adding
* dereferenced data to the parsed graph. This is essential for using multiple
@@ -67,11 +65,14 @@ public interface EntityDereferencer {
* {@link ConcurrentModificationException}s in this implementations or
* other components (typically other {@link EnhancementEngine}s) accessing the
* same graph.
+ * @param dereferenceContext Context information for the {@link EntityDereferencer}
+ * such as the {@link OfflineMode} state, possible languages of the content and
+ * requested languages in the Enhancement request.
* @return if the entity was dereferenced
* @throws DereferenceException on any error while dereferencing the
* requested Entity
*/
- boolean dereference(UriRef entity, MGraph graph, boolean offlineMode,
- Lock writeLock) throws DereferenceException;
+ boolean dereference(UriRef entity, MGraph graph, Lock writeLock,
+ DereferenceContext dereferenceContext) throws DereferenceException;
}
Modified: stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java (original)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java Mon Dec 2 14:03:53 2013
@@ -140,7 +140,8 @@ public class DereferenceEngineTest {
return false;
}
};
- EntityDereferenceEngine engine = new EntityDereferenceEngine("online", onlineDereferencer);
+ EntityDereferenceEngine engine = new EntityDereferenceEngine(onlineDereferencer,
+ new DereferenceEngineConfig("online", false, false));
//engine in online mode
Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
//set engine in offline mode
@@ -151,7 +152,8 @@ public class DereferenceEngineTest {
@Test
public void testSyncDereferencing() throws Exception {
ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
- EntityDereferenceEngine engine = new EntityDereferenceEngine("sync", syncDereferencer);
+ EntityDereferenceEngine engine = new EntityDereferenceEngine(syncDereferencer,
+ new DereferenceEngineConfig("sync", false, false));
Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
engine.computeEnhancements(ci);
validateDereferencedEntities(ci.getMetadata());
@@ -160,7 +162,8 @@ public class DereferenceEngineTest {
@Test
public void testAsyncDereferencing() throws Exception {
ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
- EntityDereferenceEngine engine = new EntityDereferenceEngine("sync", asyncDereferencer);
+ EntityDereferenceEngine engine = new EntityDereferenceEngine(asyncDereferencer,
+ new DereferenceEngineConfig("async", false, false));
Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
engine.computeEnhancements(ci);
validateDereferencedEntities(ci.getMetadata());
@@ -201,7 +204,7 @@ public class DereferenceEngineTest {
}
@Override
- public boolean dereference(UriRef entity, MGraph graph, boolean offlineMode, Lock writeLock) throws DereferenceException {
+ public boolean dereference(UriRef entity, MGraph graph, Lock writeLock, DereferenceContext context) throws DereferenceException {
Iterator<Triple> entityTriples = testData.filter(entity, null, null);
if(entityTriples.hasNext()){
writeLock.lock();
Modified: stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/EntityhubDereferenceEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/EntityhubDereferenceEngine.java?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/EntityhubDereferenceEngine.java (original)
+++ stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/EntityhubDereferenceEngine.java Mon Dec 2 14:03:53 2013
@@ -22,7 +22,6 @@ import static org.apache.stanbol.enhance
import static org.osgi.framework.Constants.SERVICE_RANKING;
import java.util.Dictionary;
-import java.util.Hashtable;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
@@ -32,13 +31,13 @@ import org.apache.felix.scr.annotations.
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.ReferenceCardinality;
import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
-import org.apache.stanbol.enhancer.engines.dereference.DereferenceUtils;
+import org.apache.stanbol.enhancer.engines.dereference.DereferenceConstants;
+import org.apache.stanbol.enhancer.engines.dereference.DereferenceEngineConfig;
import org.apache.stanbol.enhancer.engines.dereference.EntityDereferenceEngine;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.entityhub.servicesapi.Entityhub;
import org.osgi.framework.BundleContext;
-import org.osgi.framework.Constants;
import org.osgi.framework.ServiceReference;
import org.osgi.framework.ServiceRegistration;
import org.osgi.service.cm.ConfigurationException;
@@ -64,6 +63,8 @@ import org.slf4j.LoggerFactory;
@org.apache.felix.scr.annotations.Properties(value={
@Property(name=PROPERTY_NAME),
@Property(name=EntityhubDereferenceEngine.SITE_ID),
+ @Property(name=DereferenceConstants.FILTER_CONTENT_LANGUAGES,
+ boolValue=DereferenceConstants.DEFAULT_FILTER_CONTENT_LANGUAGES),
@Property(name=DEREFERENCE_ENTITIES_FIELDS,cardinality=Integer.MAX_VALUE,
value={"rdfs:comment","geo:lat","geo:long","foaf:depiction","dbp-ont:thumbnail"}),
@Property(name=DEREFERENCE_ENTITIES_LDPATH, cardinality=Integer.MAX_VALUE),
@@ -120,35 +121,10 @@ public class EntityhubDereferenceEngine
bundleContext = ctx.getBundleContext();
log.info("> activate {}",getClass().getSimpleName());
//get the metadata later set to the enhancement engine
- String engineName;
- engineMetadata = new Hashtable<String,Object>();
- Object value = properties.get(PROPERTY_NAME);
- if(value == null || value.toString().isEmpty()){
- throw new ConfigurationException(PROPERTY_NAME, "The EnhancementEngine name MUST BE configured!");
- } else {
- engineName = value.toString().trim();
- }
- log.debug(" - engineName: {}",engineName);
- engineMetadata.put(PROPERTY_NAME, engineName);
- value = properties.get(SERVICE_RANKING);
- Integer serviceRanking = null;
- if(value instanceof Number){
- serviceRanking = ((Number)value).intValue();
- } else if(value != null){
- try {
- serviceRanking = Integer.parseInt(value.toString());
- } catch(NumberFormatException e){
- throw new ConfigurationException(SERVICE_RANKING, "Parsed service ranking '"
- + value + "' (type: " + value.getClass().getName()
- + "' can not be converted to an integer value!", e);
- }
- } //else not defined
- if(serviceRanking != null){
- log.debug(" - service.ranking: {}", serviceRanking);
- engineMetadata.put(Constants.SERVICE_RANKING, serviceRanking);
- }
+ DereferenceEngineConfig engineConfig = new DereferenceEngineConfig(properties);
+ log.debug(" - engineName: {}", engineConfig.getEngineName());
//parse the Entityhub Site used for dereferencing
- value = properties.get(SITE_ID);
+ Object value = properties.get(SITE_ID);
//init the EntitySource
if (value == null) {
siteName = "*"; //all referenced sites
@@ -174,12 +150,9 @@ public class EntityhubDereferenceEngine
//set the namespace prefix service to the dereferencer
entityDereferencer.setNsPrefixService(prefixService);
//now parse dereference field config
- entityDereferencer.setDereferencedFields(
- DereferenceUtils.parseDereferencedFieldsConfig(properties));
- //create the engine
- entityDereferencer.setLdPath(
- DereferenceUtils.parseLdPathConfig(properties));
- entityDereferenceEngine = new EntityDereferenceEngine(engineName, entityDereferencer);
+ entityDereferencer.setDereferencedFields(engineConfig.getDereferenceFields());
+ entityDereferencer.setLdPath(engineConfig.getLdPathProgram());
+ entityDereferenceEngine = new EntityDereferenceEngine(entityDereferencer, engineConfig);
//NOTE: registration of this instance as OSGI service is done as soon as the
// entityhub service backing the entityDereferencer is available.
@@ -217,7 +190,7 @@ public class EntityhubDereferenceEngine
new String[]{EnhancementEngine.class.getName(),
ServiceProperties.class.getName()},
entityDereferenceEngine,
- engineMetadata);
+ entityDereferenceEngine.getConfig().getDict());
}
trackedServiceCount++;
Modified: stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/TrackingDereferencerBase.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/TrackingDereferencerBase.java?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/TrackingDereferencerBase.java (original)
+++ stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/TrackingDereferencerBase.java Mon Dec 2 14:03:53 2013
@@ -20,6 +20,8 @@ import java.io.StringReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
@@ -28,12 +30,14 @@ import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.locks.Lock;
+import org.apache.clerezza.rdf.core.Language;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
import org.apache.commons.lang.StringUtils;
import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
import org.apache.stanbol.enhancer.engines.dereference.DereferenceConstants;
+import org.apache.stanbol.enhancer.engines.dereference.DereferenceContext;
import org.apache.stanbol.enhancer.engines.dereference.DereferenceException;
import org.apache.stanbol.enhancer.engines.dereference.EntityDereferencer;
import org.apache.stanbol.entityhub.core.mapping.DefaultFieldMapperImpl;
@@ -48,9 +52,11 @@ import org.apache.stanbol.entityhub.serv
import org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapper;
import org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping;
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
+import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
import org.osgi.framework.BundleContext;
import org.osgi.framework.Filter;
import org.osgi.framework.InvalidSyntaxException;
@@ -252,7 +258,7 @@ public abstract class TrackingDereferenc
}
@Override
- public final boolean dereference(UriRef uri, MGraph graph, boolean offlineMode, Lock writeLock) throws DereferenceException {
+ public final boolean dereference(UriRef uri, MGraph graph, Lock writeLock, DereferenceContext derefContext) throws DereferenceException {
T service = getService();
if(service == null){
throw new DereferenceException(uri, serviceClass.getClass().getSimpleName()
@@ -260,19 +266,24 @@ public abstract class TrackingDereferenc
}
Representation rep;
try {
- rep = getRepresentation(service, uri.getUnicodeString(), offlineMode);
+ rep = getRepresentation(service, uri.getUnicodeString(), derefContext.isOfflineMode());
} catch(EntityhubException e){
throw new DereferenceException(uri,e);
}
+ //we need the languages as strings
+ final Set<String> langs = derefContext.getLanguages();
+
if(rep != null){
- if(fieldMapper == null && ldpathProgram == null){
+ if(fieldMapper == null && ldpathProgram == null && langs.isEmpty()){
copyAll(uri, rep, graph, writeLock);
- } else {
- if(fieldMapper != null){
- copyMapped(uri, rep, graph, writeLock);
+ } else { //we need to apply some filters while dereferencing
+ if(fieldMapper != null || !langs.isEmpty()){
+ //this considers speficied fields and included languages
+ copyMapped(uri, rep, langs, graph, writeLock);
}
if(ldpathProgram != null){
- copyLdPath(uri, getRdfBackend(service), graph, writeLock);
+ //this executes LDPath statements
+ copyLdPath(uri, getRdfBackend(service), langs, graph, writeLock);
}
}
return true;
@@ -285,13 +296,14 @@ public abstract class TrackingDereferenc
* writes the the results to the parsed Graph
* @param uri the context
* @param rdfBackend the RdfBackend the LDPath program is executed on
+ * @param langs the set of languages to dereference
* @param graph the graph to store the results
* @param writeLock the write lock for the graph
* @throws DereferenceException on any {@link EntityhubException} while
* executing the LDPath program
*/
- protected void copyLdPath(UriRef uri, RDFBackend<Object> rdfBackend,
- MGraph graph, Lock writeLock) throws DereferenceException {
+ private void copyLdPath(UriRef uri, RDFBackend<Object> rdfBackend,
+ Set<String> langs, MGraph graph, Lock writeLock) throws DereferenceException {
//A RdfReference needs to be used as context
RdfReference context = valueFactory.createReference(uri);
//create the representation that stores results in an intermediate
@@ -303,13 +315,23 @@ public abstract class TrackingDereferenc
for(at.newmedialab.ldpath.model.fields.FieldMapping<?,Object> mapping : ldpathProgram.getFields()) {
Collection<?> values = mapping.getValues(rdfBackend, context);
if(values != null && !values.isEmpty()){
- result.add(mapping.getFieldName(),values);
+ String fieldName = mapping.getFieldName();
+ if(langs.isEmpty()){
+ result.add(fieldName,values);
+ } else { //filter for languages
+ for(Object value : values){
+ if((!(value instanceof Text)) ||
+ langs.contains(((Text)value).getLanguage())){
+ result.add(fieldName, value);
+ } //else text with filtered language ... do not add
+ }
+ }
}
}
} catch (EntityhubException e){
throw new DereferenceException(uri, e);
}
- if(!ldPathResults.isEmpty()){ //copy the resutls
+ if(!ldPathResults.isEmpty()){ //copy the results
writeLock.lock();
try {
graph.addAll(ldPathResults);
@@ -340,10 +362,27 @@ public abstract class TrackingDereferenc
* in the graph
* @param uri the uri of the entity to dereference
* @param rep the data for the entity as in the entityhub
+ * @param langs the set of languages to dereference
* @param graph the graph to store the mapping results
* @param writeLock the write lock for the graph
*/
- private void copyMapped(UriRef uri, Representation rep, MGraph graph, Lock writeLock) {
+ private void copyMapped(UriRef uri, Representation rep, Set<String> langs,
+ MGraph graph, Lock writeLock) {
+ //init the fieldMapper
+ FieldMapper fieldMapper;
+ if(!langs.isEmpty()){ //if we need to filter for specific languages
+ //we need to modify the field and add a global filter for the
+ //languages. NOTE that the field might be null. In that case we
+ //need just filter literals by language
+ //TODO: maybe cache fieldMappers for sets of languages
+ fieldMapper = this.fieldMapper != null ? this.fieldMapper.clone() :
+ new DefaultFieldMapperImpl(ValueConverterFactory.getDefaultInstance());
+ fieldMapper.addMapping(new FieldMapping(new TextConstraint(
+ (String)null, langs.toArray(new String[graph.size()]))));
+ } else { //just use the fieldMapper as parsed in the config
+ fieldMapper = this.fieldMapper;
+ }
+ //execute the field mappings
writeLock.lock();
try {
RdfRepresentation clerezzaRep = valueFactory.createRdfRepresentation(uri, graph);
Modified: stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties Mon Dec 2 14:03:53 2013
@@ -22,6 +22,12 @@ service.ranking.description=If two enhan
one with the higher ranking will be used to process parsed content items.
+org.apache.stanbol.enhancer.engines.dereference.entityhub.EntityhubDereferenceEngine.name=Apache \
+Stanbol Enhancer Engine: Entityhub Dereference
+org.apache.stanbol.enhancer.engines.dereference.entityhub.EntityhubDereferenceEngine.description=Enhancement \
+Engine that dereferences Entities by using the Stanbol Entityhub. This engine supports single Sites, \
+, SiteManager as well as the Entityhub itself.
+
#===============================================================================
#Properties and Options used to configure
#===============================================================================
@@ -38,3 +44,7 @@ information to be included for dereferen
dereference rules. As an example 'schema:name = .[rdf:type is foaf:Person]/fn:concat(\
foaf:given," ",foaf:surname)' would construct the schema:name value by concatenation \
of the foaf:given and foaf:surname if the entity is of type foaf:Person.
+
+enhancer.engines.dereference.entityhub.siteId.name=Site
+enhancer.engines.dereference.entityhub.siteId.description=The name of the Entityhub Site, \
+'entityhub' for the Entityhub or '*' to dereference Entities using all active sites.