You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/12/02 15:03:54 UTC

svn commit: r1547023 - in /stanbol/trunk/enhancement-engines/dereference: core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/ core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/ entityhub/src/main/java/org/apache/stanbol...

Author: rwesten
Date: Mon Dec  2 14:03:53 2013
New Revision: 1547023

URL: http://svn.apache.org/r1547023
Log:
definition/implementation of STANBOL-1228, implementation for the Entityhub Dereference Engine (STANBOL-1223); This also includes several improvements and bug fixes for STANBOL-1223

Added:
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceContext.java
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineConfig.java
Removed:
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceUtils.java
Modified:
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceConstants.java
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java
    stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java
    stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/EntityhubDereferenceEngine.java
    stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/TrackingDereferencerBase.java
    stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties

Modified: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceConstants.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceConstants.java?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceConstants.java (original)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceConstants.java Mon Dec  2 14:03:53 2013
@@ -16,12 +16,52 @@
  */
 package org.apache.stanbol.enhancer.engines.dereference;
 
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.PlainLiteral;
+import org.apache.clerezza.rdf.core.Triple;
+
 /**
  * Define configuration parameters for Dereference engines
  * @author Rupert Westenthaler
  *
  */
 public interface DereferenceConstants {
+    
+    /**
+     * Property that allows to enable/disable the filtering of {@link Triple}s
+     * with {@link PlainLiteral} {@link Triple#getObject() objects} based on
+     * their {@link Language}. Languages that need to be dereferenced are
+     * parsed to the {@link EntityDereferencer} via the
+     * {@link DereferenceContext#getContentLanguages()}. If empty no languages
+     * MUST BE filtered. <p>
+     * If both this and {@link #FILTER_ACCEPT_LANGUAGES} are enabled the filter
+     * should use the union of the two sets available via 
+     * {@link DereferenceContext#getLanguages()}.
+     */
+    String FILTER_CONTENT_LANGUAGES = "enhancer.engine.dereference.filterContentlanguages";
+    /**
+     * By default {@link #FILTER_CONTENT_LANGUAGES} is deactivated
+     */
+    boolean DEFAULT_FILTER_CONTENT_LANGUAGES = false;
+    
+    /**
+     * Property that allows to enable/disable the filtering of {@link Triple}s
+     * with {@link PlainLiteral} {@link Triple#getObject() objects} based on
+     * their {@link Language}. Languages that need to be dereferenced are
+     * parsed to the {@link EntityDereferencer} via the
+     * {@link DereferenceContext#getAcceptLanguages()}. If empty no languages
+     * MUST BE filtered.<p>
+     * If both this and {@link #FILTER_CONTENT_LANGUAGES} are enabled the filter
+     * should use the union of the two sets available via 
+     * {@link DereferenceContext#getLanguages()}.
+     */
+    String FILTER_ACCEPT_LANGUAGES = "enhancer.engine.dereference.filterAcceptlanguages";
+    
+    /**
+     * By default {@link #FILTER_ACCEPT_LANGUAGES} is activated
+     */
+    boolean DEFAULT_FILTER_ACCEPT_LANGUAGES = true;
+    
     /**
      * Property used to configure the fields that should be dereferenced.<p>
      * DereferenceEngines need to support a list of URIs but may also support more

Added: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceContext.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceContext.java?rev=1547023&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceContext.java (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceContext.java Mon Dec  2 14:03:53 2013
@@ -0,0 +1,55 @@
+package org.apache.stanbol.enhancer.engines.dereference;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
+
+public class DereferenceContext {
+
+    /**
+     * The {@link OfflineMode} status
+     */
+    protected final boolean offlineMode;
+    /** 
+     * Read-only set with languages that need to be dereferenced.
+     */
+    private Set<String> languages = new HashSet<String>();
+    
+    /**
+     * Create a new DereferenceContext.
+     * @param offlineMode the {@link OfflineMode} state
+     */
+    protected DereferenceContext(boolean offlineMode){
+        this.offlineMode = offlineMode;
+    }
+
+    /**
+     * If the {@link OfflineMode} is active
+     * @return the offline mode status
+     */
+    public boolean isOfflineMode() {
+        return offlineMode;
+    }
+    /**
+     * Setter for the languages of literals that should be dereferenced
+     * @param languages the ContentLanguages
+     */
+    protected void setLanguages(Set<String> languages) {
+        if(languages == null){
+            this.languages = Collections.emptySet();
+        } else {
+            this.languages = Collections.unmodifiableSet(languages);
+        }
+    }
+    /**
+     * Getter for the languages that should be dereferenced. If 
+     * empty all languages should be included.
+     * @return the languages for literals that should be dereferenced.
+     */
+    public Set<String> getLanguages() {
+        return languages;
+    }
+}

Added: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineConfig.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineConfig.java?rev=1547023&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineConfig.java (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineConfig.java Mon Dec  2 14:03:53 2013
@@ -0,0 +1,200 @@
+package org.apache.stanbol.enhancer.engines.dereference;
+
+import static org.apache.stanbol.enhancer.engines.dereference.DereferenceConstants.DEREFERENCE_ENTITIES_FIELDS;
+import static org.apache.stanbol.enhancer.engines.dereference.DereferenceConstants.DEREFERENCE_ENTITIES_LDPATH;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.Hashtable;
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.osgi.framework.Constants;
+import org.osgi.service.cm.ConfigurationException;
+
+public class DereferenceEngineConfig implements DereferenceConstants {
+
+    
+    private final Dictionary<String,Object> config;
+    
+    /**
+     * Creates a DereferenceEngine configuration based on a Dictionary. Typically
+     * the dictionary will contain keys as defined by {@link DereferenceConstants}
+     * and {@link EnhancementEngine}
+     * @param config the config - typically as parsed in the activate method of
+     * an OSGI component.
+     */
+    public DereferenceEngineConfig(Dictionary<String,Object> config) throws ConfigurationException {
+        this.config = config;
+        validateRequired(config);
+    }
+    /**
+     * Constructor that parses the config (ATM only used by unit tests)
+     * @param name
+     * @param filterContentLang
+     * @param filterAcceptLang
+     * @throws ConfigurationException
+     */
+    protected DereferenceEngineConfig(String name, boolean filterContentLang, boolean filterAcceptLang) 
+            throws ConfigurationException {
+        config = new Hashtable<String,Object>();
+        config.put(EnhancementEngine.PROPERTY_NAME, name);
+        config.put(FILTER_CONTENT_LANGUAGES, filterContentLang);
+        config.put(FILTER_ACCEPT_LANGUAGES, filterAcceptLang);
+        validateRequired(config);
+    }
+    
+    /**
+     * If filtering for non content language literals is active
+     * @return the {@link DereferenceConstants#FILTER_CONTENT_LANGUAGES} state
+     */
+    public boolean isFilterContentLanguages(){
+        Object value = config.get(FILTER_CONTENT_LANGUAGES);
+        return value == null ? DEFAULT_FILTER_CONTENT_LANGUAGES : 
+            Boolean.parseBoolean(value.toString());
+    }
+    
+    /**
+     * if filtering for non accept language literals is active
+     * @return the {@link DereferenceConstants#FILTER_ACCEPT_LANGUAGES} state
+     */
+    public boolean isFilterAcceptLanguages(){
+        Object value = config.get(FILTER_ACCEPT_LANGUAGES);
+        return value == null ? DEFAULT_FILTER_ACCEPT_LANGUAGES : 
+            Boolean.parseBoolean(value.toString());
+    }
+    
+    private void validateRequired(Dictionary<String,Object> config) throws ConfigurationException {
+        Object value = config.get(EnhancementEngine.PROPERTY_NAME);
+        if(value == null || StringUtils.isBlank(value.toString())){
+            throw new ConfigurationException(EnhancementEngine.PROPERTY_NAME, 
+                "The EnhancementEngine name MUST NOT be NULL nor empty!");
+        }
+        value = config.get(DEREFERENCE_ENTITIES_FIELDS);
+        if(value != null && !(value instanceof String || value instanceof String[] ||
+                value instanceof Collection<?>)){
+            throw new ConfigurationException(DEREFERENCE_ENTITIES_FIELDS, 
+                "Dereference Entities Fields MUST BE parsed as String[], Collection<String> or "
+                + "String (single value). The actual value '"+value+"'(type: '"+value.getClass() 
+                + "') is NOT supported");
+        }
+        value = config.get(DEREFERENCE_ENTITIES_LDPATH);
+        if(value != null && !(value instanceof String || value instanceof String[] ||
+                value instanceof Collection<?>)){
+            throw new ConfigurationException(DEREFERENCE_ENTITIES_LDPATH, 
+                "Dereference LDPath statements MUST BE parsed as String, String[] or "
+                + "Collection<String>. The actual value '"+value+"'(type: '"+value.getClass() 
+                + "') is NOT supported");            
+        }        
+    }
+
+    /**
+     * Getter for the name of the EnhancementEngine
+     * @return the configured {@link EnhancementEngine#PROPERTY_NAME}
+     */
+    public String getEngineName(){
+        Object value = config.get(EnhancementEngine.PROPERTY_NAME);
+        return value == null ? null : value.toString();
+    }
+    /**
+     * The Integer service ranking for the engine
+     * @return the configured {@link Constants#SERVICE_RANKING}
+     */
+    public Integer getServiceRanking(){
+        Object value = config.get(Constants.SERVICE_RANKING);
+        return value instanceof Integer ? (Integer) value : 
+            value instanceof Number ? ((Number)value).intValue() :
+                value != null ? Integer.parseInt(value.toString()) : 
+                    null;
+        
+    }
+    /**
+     * Parsed the {@link DereferenceConstants#DEREFERENCE_ENTITIES_FIELDS}
+     * config from the parsed Dictionary regardless if it is defined as 
+     * <code>String[]</code>, <code>Collection&lt;String&gt;</code> or
+     * <code>String</code> (single value).<p>
+     * This returns the fields as parsed by the configuration.<p>
+     * <b>NOTE:</b> This does not check/convert <code>{prefix}:{localname}</code>
+     * configurations to URIs. The receiver of the list is responsible for
+     * that 
+     * @return the {@link List} with the unprocessed dereference fields as list
+     */
+    public List<String> getDereferenceFields(){
+        Object value = config.get(DEREFERENCE_ENTITIES_FIELDS);
+        final List<String> fields;
+        if(value instanceof String[]){
+            fields = Arrays.asList((String[])value);
+        } else if(value instanceof Collection<?>){
+            fields = new ArrayList<String>(((Collection<?>)value).size());
+            for(Object field : (Collection<?>)value){
+                if(field == null){
+                    fields.add(null);
+                } else {
+                    fields.add(field.toString());
+                }
+            }
+        } else if(value instanceof String){
+            fields = Collections.singletonList((String)value);
+        } else { //value == null or of unsupported type 
+            fields = Collections.emptyList();
+        }
+        return fields;
+    }
+    /**
+     * Parses the LdPath program from the value of the 
+     * {@link DereferenceConstants#DEREFERENCE_ENTITIES_LDPATH} property. <p>
+     * This supports <code>String</code> (the program as a single String), 
+     * <code>String[]</code> and <code>Collection&lt;String&gt;</code> (one
+     * statement per line).<p>
+     * <b>NOTE:</b> This does not parse the LDPath program as this can only be
+     * done by the LdPath repository used by the dereferencer.
+     * @return the unparsed LDPath program as String 
+     */
+    public String getLdPathProgram(){
+        Object value = config.get(DEREFERENCE_ENTITIES_LDPATH);
+        if(value == null){
+            return null;
+        } else if(value instanceof String){
+            return StringUtils.isBlank((String) value) ? null : (String) value;
+        }
+        StringBuilder sb = new StringBuilder();
+        boolean first = true;
+        if(value instanceof Collection<?>){
+            for(Object line : (Collection<?>)value){
+                if(line != null && !StringUtils.isBlank(line.toString())){
+                    if(first){
+                        first = false;
+                    } else {
+                        sb.append('\n');
+                    }
+                    sb.append(line.toString());
+                }
+            }
+        } else if(value instanceof String[]){
+            for(String line : (String[])value){
+                if(line != null && !StringUtils.isBlank(line)){
+                    if(first){
+                        first = false;
+                    } else {
+                        sb.append('\n');
+                    }
+                    sb.append(line);
+                }
+            }
+        } //else unsupported type - ignore
+        //if first == false we we have not found any non blank line -> return null!
+        return !first ? sb.toString() : null;
+    }
+    /**
+     * The dictionary holding the config
+     * @return the dictionary holding the config
+     */
+    public Dictionary<String,Object> getDict(){
+        return config;
+    }
+    
+}

Modified: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java (original)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java Mon Dec  2 14:03:53 2013
@@ -16,6 +16,7 @@
  */
 package org.apache.stanbol.enhancer.engines.dereference;
 
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
 
 import java.util.ArrayList;
@@ -32,6 +33,7 @@ import java.util.concurrent.ExecutorServ
 import java.util.concurrent.Future;
 import java.util.concurrent.locks.Lock;
 
+import org.apache.clerezza.rdf.core.Language;
 import org.apache.clerezza.rdf.core.MGraph;
 import org.apache.clerezza.rdf.core.NonLiteral;
 import org.apache.clerezza.rdf.core.Resource;
@@ -43,6 +45,8 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -62,8 +66,14 @@ public class EntityDereferenceEngine imp
     
     protected final EntityDereferencer dereferencer;
     
+    protected final DereferenceEngineConfig config;
+    
     protected final String name;
     
+    protected final boolean filterContentLanguages;
+    
+    protected final boolean filterAcceptLanguages;
+    
     /**
      * The Map holding the {@link #serviceProperties} for this engine.
      */
@@ -74,11 +84,14 @@ public class EntityDereferenceEngine imp
      */
     private final Map<String,Object> unmodServiceProperties = Collections.unmodifiableMap(serviceProperties);
     
-    public EntityDereferenceEngine(String name, EntityDereferencer dereferencer){
-        if(StringUtils.isBlank(name)){
-            throw new IllegalArgumentException("The parsed EnhancementEngine name MUST NOT be NULL nor empty!");
-        }
-        this.name = name;
+    public EntityDereferenceEngine(EntityDereferencer dereferencer, DereferenceEngineConfig config){
+        if(config == null){
+            throw new IllegalArgumentException("The parsed DereferenceEngineConfig MUST NOT be NULL!");
+        }
+        this.config = config;
+        this.name = config.getEngineName();
+        this.filterContentLanguages = config.isFilterContentLanguages();
+        this.filterAcceptLanguages = config.isFilterAcceptLanguages();
         if(dereferencer == null){
             throw new IllegalArgumentException("The parsed EntityDereferencer MUST NOT be NULL!");
         }
@@ -114,6 +127,14 @@ public class EntityDereferenceEngine imp
     public Integer getEngineOrdering(){
         return (Integer)serviceProperties.get(ENHANCEMENT_ENGINE_ORDERING);
     }
+
+    /**
+     * Getter for the config of this engine
+     * @return the Dereference Engine Configuration
+     */
+    public DereferenceEngineConfig getConfig() {
+        return config;
+    }
     
     @Override
     public Map<String,Object> getServiceProperties() {
@@ -136,11 +157,21 @@ public class EntityDereferenceEngine imp
             return;
         }
         log.debug("> dereference Entities for ContentItem {}", ci.getUri());
+        final DereferenceContext derefContext = new DereferenceContext(offline);
+        Set<String> includedLangs = new HashSet<String>();
+        //TODO: parse accept languages as soon as Enhancement properties are implemented
         final MGraph metadata = ci.getMetadata();
         Set<UriRef> referencedEntities = new HashSet<UriRef>();
         //(1) read all Entities we need to dereference from the parsed contentItem
         ci.getLock().readLock().lock();
         try {
+            //parse the languages detected for the content
+            if(filterContentLanguages){
+                for(NonLiteral langAnno : EnhancementEngineHelper.getLanguageAnnotations(metadata)){
+                    includedLangs.add(EnhancementEngineHelper.getString(metadata, langAnno, DC_LANGUAGE));
+                }
+            } //no content language filtering - leave contentLanguages empty
+            //parse the referenced entities from the graph
             Iterator<Triple> entityReferences = metadata.filter(null, ENHANCER_ENTITY_REFERENCE, null);
             while(entityReferences.hasNext()){
                 Triple triple = entityReferences.next();
@@ -162,18 +193,27 @@ public class EntityDereferenceEngine imp
         } finally {
             ci.getLock().readLock().unlock();
         }
+        if(!includedLangs.isEmpty()){
+            includedLangs.add(null); //also include literals without language
+            //and set the list to the dereference context
+            derefContext.setLanguages(includedLangs);
+        } //else no filterLanguages set ... nothing to do
+
         final Lock writeLock = ci.getLock().writeLock();
-        log.trace(" - scheduled {} Entities for dereferencing", referencedEntities.size());
+        log.trace(" - scheduled {} Entities for dereferencing", 
+            referencedEntities.size());
         //(2) dereference the Entities
         ExecutorService executor = dereferencer.getExecutor();
         long start = System.currentTimeMillis();
         Set<UriRef> failedEntities = new HashSet<UriRef>();
         int dereferencedCount = 0;
-        List<DereferenceJob> dereferenceJobs = new ArrayList<DereferenceJob>(referencedEntities.size());
+        List<DereferenceJob> dereferenceJobs = new ArrayList<DereferenceJob>(
+                referencedEntities.size());
         if(executor != null && !executor.isShutdown()){ //dereference using executor
             //schedule all entities to dereference
             for(final UriRef entity : referencedEntities){
-                DereferenceJob dereferenceJob = new DereferenceJob(entity, metadata, writeLock);
+                DereferenceJob dereferenceJob = new DereferenceJob(entity, 
+                    metadata, writeLock, derefContext);
                 dereferenceJob.setFuture(executor.submit(dereferenceJob));
                 dereferenceJobs.add(dereferenceJob);
             }
@@ -195,7 +235,8 @@ public class EntityDereferenceEngine imp
                             + dereferenceJob.entity + "!", e);
                     } else { //unknown error
                         throw new EngineException(this,ci, "Unchecked Error while "
-                            + "dereferencing Entity " + dereferenceJob.entity +"!", e);
+                            + "dereferencing Entity " + dereferenceJob.entity
+                            + "!", e);
                     }
                 }
             }
@@ -203,7 +244,7 @@ public class EntityDereferenceEngine imp
             for(UriRef entity : referencedEntities){
                 try {
                     log.trace("  ... dereference {}", entity);
-                    if(dereferencer.dereference(entity, metadata, offline, writeLock)){
+                    if(dereferencer.dereference(entity, metadata, writeLock, derefContext)){
                         dereferencedCount++;
                         log.trace("    + success");
                     } else {
@@ -245,19 +286,22 @@ public class EntityDereferenceEngine imp
         final UriRef entity;
         final MGraph metadata;
         final Lock writeLock;
+        final DereferenceContext derefContext;
 
         private Future<Boolean> future;
         
-        DereferenceJob(UriRef entity, MGraph metadata, Lock writeLock){
+        DereferenceJob(UriRef entity, MGraph metadata, Lock writeLock, 
+            DereferenceContext derefContext){
             this.entity = entity;
             this.metadata = metadata;
             this.writeLock = writeLock;
+            this.derefContext = derefContext;
         }
         
         @Override
         public Boolean call() throws DereferenceException {
             log.trace("  ... dereference {}", entity);
-            boolean state = dereferencer.dereference(entity, metadata, offline, writeLock);
+            boolean state = dereferencer.dereference(entity, metadata, writeLock, derefContext);
             if(state){
                 log.trace("    + success");
             } else {

Modified: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java (original)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java Mon Dec  2 14:03:53 2013
@@ -47,7 +47,7 @@ public interface EntityDereferencer {
     
     /**
      * EntityDereferencer can optionally provide an ExecutorService used to
-     * dereference Entities. 
+     * dereference Entities.
      * @return the {@link ExecutorService} or <code>null</code> if not used
      * by this implementation
      */
@@ -58,8 +58,6 @@ public interface EntityDereferencer {
      * data to the parsed graph
      * @param graph the graph to add the dereferenced entity 
      * @param entity the uri of the Entity to dereference
-     * @param offlineMode <code>true</code> if {@link OfflineMode} is active.
-     * Otherwise <code>false</code>
      * @param writeLock The writeLock for the graph. Dereferences MUST require
      * a <code>{@link Lock#lock() writeLock#lock()}</code>  before adding 
      * dereferenced data to the parsed graph. This is essential for using multiple 
@@ -67,11 +65,14 @@ public interface EntityDereferencer {
      * {@link ConcurrentModificationException}s in this implementations or
      * other components (typically other {@link EnhancementEngine}s) accessing the
      * same graph.
+     * @param dereferenceContext Context information for the {@link EntityDereferencer}
+     * such as the {@link OfflineMode} state, possible languages of the content and
+     * requested languages in the Enhancement request.
      * @return if the entity was dereferenced
      * @throws DereferenceException on any error while dereferencing the
      * requested Entity
      */
-    boolean dereference(UriRef entity, MGraph graph, boolean offlineMode, 
-            Lock writeLock) throws DereferenceException;
+    boolean dereference(UriRef entity, MGraph graph, Lock writeLock, 
+            DereferenceContext dereferenceContext) throws DereferenceException;
         
 }

Modified: stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java (original)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java Mon Dec  2 14:03:53 2013
@@ -140,7 +140,8 @@ public class DereferenceEngineTest {
                 return false;
             }  
         };
-        EntityDereferenceEngine engine = new EntityDereferenceEngine("online", onlineDereferencer);
+        EntityDereferenceEngine engine = new EntityDereferenceEngine(onlineDereferencer,
+            new DereferenceEngineConfig("online", false, false));
         //engine in online mode
         Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
         //set engine in offline mode
@@ -151,7 +152,8 @@ public class DereferenceEngineTest {
     @Test
     public void testSyncDereferencing() throws Exception {
         ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
-        EntityDereferenceEngine engine = new EntityDereferenceEngine("sync", syncDereferencer);
+        EntityDereferenceEngine engine = new EntityDereferenceEngine(syncDereferencer,
+            new DereferenceEngineConfig("sync", false, false));
         Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
         engine.computeEnhancements(ci);
         validateDereferencedEntities(ci.getMetadata());
@@ -160,7 +162,8 @@ public class DereferenceEngineTest {
     @Test
     public void testAsyncDereferencing() throws Exception {
         ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
-        EntityDereferenceEngine engine = new EntityDereferenceEngine("sync", asyncDereferencer);
+        EntityDereferenceEngine engine = new EntityDereferenceEngine(asyncDereferencer,
+            new DereferenceEngineConfig("async", false, false));
         Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
         engine.computeEnhancements(ci);
         validateDereferencedEntities(ci.getMetadata());
@@ -201,7 +204,7 @@ public class DereferenceEngineTest {
         }
 
         @Override
-        public boolean dereference(UriRef entity, MGraph graph, boolean offlineMode, Lock writeLock) throws DereferenceException {
+        public boolean dereference(UriRef entity, MGraph graph, Lock writeLock, DereferenceContext context) throws DereferenceException {
             Iterator<Triple> entityTriples = testData.filter(entity, null, null);
             if(entityTriples.hasNext()){
                 writeLock.lock();

Modified: stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/EntityhubDereferenceEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/EntityhubDereferenceEngine.java?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/EntityhubDereferenceEngine.java (original)
+++ stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/EntityhubDereferenceEngine.java Mon Dec  2 14:03:53 2013
@@ -22,7 +22,6 @@ import static org.apache.stanbol.enhance
 import static org.osgi.framework.Constants.SERVICE_RANKING;
 
 import java.util.Dictionary;
-import java.util.Hashtable;
 
 import org.apache.felix.scr.annotations.Activate;
 import org.apache.felix.scr.annotations.Component;
@@ -32,13 +31,13 @@ import org.apache.felix.scr.annotations.
 import org.apache.felix.scr.annotations.Reference;
 import org.apache.felix.scr.annotations.ReferenceCardinality;
 import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
-import org.apache.stanbol.enhancer.engines.dereference.DereferenceUtils;
+import org.apache.stanbol.enhancer.engines.dereference.DereferenceConstants;
+import org.apache.stanbol.enhancer.engines.dereference.DereferenceEngineConfig;
 import org.apache.stanbol.enhancer.engines.dereference.EntityDereferenceEngine;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
 import org.apache.stanbol.entityhub.servicesapi.Entityhub;
 import org.osgi.framework.BundleContext;
-import org.osgi.framework.Constants;
 import org.osgi.framework.ServiceReference;
 import org.osgi.framework.ServiceRegistration;
 import org.osgi.service.cm.ConfigurationException;
@@ -64,6 +63,8 @@ import org.slf4j.LoggerFactory;
 @org.apache.felix.scr.annotations.Properties(value={
     @Property(name=PROPERTY_NAME),
     @Property(name=EntityhubDereferenceEngine.SITE_ID),
+    @Property(name=DereferenceConstants.FILTER_CONTENT_LANGUAGES, 
+    boolValue=DereferenceConstants.DEFAULT_FILTER_CONTENT_LANGUAGES),
     @Property(name=DEREFERENCE_ENTITIES_FIELDS,cardinality=Integer.MAX_VALUE,
     	value={"rdfs:comment","geo:lat","geo:long","foaf:depiction","dbp-ont:thumbnail"}),
     @Property(name=DEREFERENCE_ENTITIES_LDPATH, cardinality=Integer.MAX_VALUE),
@@ -120,35 +121,10 @@ public class EntityhubDereferenceEngine 
         bundleContext = ctx.getBundleContext();
         log.info("> activate {}",getClass().getSimpleName());
         //get the metadata later set to the enhancement engine
-        String engineName;
-        engineMetadata = new Hashtable<String,Object>();
-        Object value = properties.get(PROPERTY_NAME);
-        if(value == null || value.toString().isEmpty()){
-            throw new ConfigurationException(PROPERTY_NAME, "The EnhancementEngine name MUST BE configured!");
-        } else {
-            engineName = value.toString().trim();
-        }
-        log.debug(" - engineName: {}",engineName);
-        engineMetadata.put(PROPERTY_NAME, engineName);
-        value = properties.get(SERVICE_RANKING);
-        Integer serviceRanking = null;
-        if(value instanceof Number){
-            serviceRanking = ((Number)value).intValue();
-        } else if(value != null){
-            try {
-                serviceRanking = Integer.parseInt(value.toString());
-            } catch(NumberFormatException e){
-                throw new ConfigurationException(SERVICE_RANKING, "Parsed service ranking '"
-                        + value + "' (type: " + value.getClass().getName()
-                        + "' can not be converted to an integer value!", e);
-            }
-        } //else not defined
-        if(serviceRanking != null){
-            log.debug(" - service.ranking: {}", serviceRanking);
-            engineMetadata.put(Constants.SERVICE_RANKING, serviceRanking);
-        }
+        DereferenceEngineConfig engineConfig = new DereferenceEngineConfig(properties);
+        log.debug(" - engineName: {}", engineConfig.getEngineName());
         //parse the Entityhub Site used for dereferencing
-        value = properties.get(SITE_ID);
+        Object value = properties.get(SITE_ID);
         //init the EntitySource
         if (value == null) {
             siteName = "*"; //all referenced sites
@@ -174,12 +150,9 @@ public class EntityhubDereferenceEngine 
         //set the namespace prefix service to the dereferencer
         entityDereferencer.setNsPrefixService(prefixService);
         //now parse dereference field config
-        entityDereferencer.setDereferencedFields(
-            DereferenceUtils.parseDereferencedFieldsConfig(properties));
-        //create the engine
-        entityDereferencer.setLdPath(
-            DereferenceUtils.parseLdPathConfig(properties));
-        entityDereferenceEngine = new EntityDereferenceEngine(engineName, entityDereferencer);
+        entityDereferencer.setDereferencedFields(engineConfig.getDereferenceFields());
+        entityDereferencer.setLdPath(engineConfig.getLdPathProgram());
+        entityDereferenceEngine = new EntityDereferenceEngine(entityDereferencer, engineConfig);
         //NOTE: registration of this instance as OSGI service is done as soon as the
         //      entityhub service backing the entityDereferencer is available.
         
@@ -217,7 +190,7 @@ public class EntityhubDereferenceEngine 
                         new String[]{EnhancementEngine.class.getName(),
                                      ServiceProperties.class.getName()},
                     entityDereferenceEngine,
-                    engineMetadata);
+                    entityDereferenceEngine.getConfig().getDict());
                     
                 }
                 trackedServiceCount++;

Modified: stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/TrackingDereferencerBase.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/TrackingDereferencerBase.java?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/TrackingDereferencerBase.java (original)
+++ stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/java/org/apache/stanbol/enhancer/engines/dereference/entityhub/TrackingDereferencerBase.java Mon Dec  2 14:03:53 2013
@@ -20,6 +20,8 @@ import java.io.StringReader;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
 import java.util.IdentityHashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -28,12 +30,14 @@ import java.util.Map.Entry;
 import java.util.Set;
 import java.util.concurrent.locks.Lock;
 
+import org.apache.clerezza.rdf.core.Language;
 import org.apache.clerezza.rdf.core.MGraph;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
 import org.apache.commons.lang.StringUtils;
 import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
 import org.apache.stanbol.enhancer.engines.dereference.DereferenceConstants;
+import org.apache.stanbol.enhancer.engines.dereference.DereferenceContext;
 import org.apache.stanbol.enhancer.engines.dereference.DereferenceException;
 import org.apache.stanbol.enhancer.engines.dereference.EntityDereferencer;
 import org.apache.stanbol.entityhub.core.mapping.DefaultFieldMapperImpl;
@@ -48,9 +52,11 @@ import org.apache.stanbol.entityhub.serv
 import org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapper;
 import org.apache.stanbol.entityhub.servicesapi.mapping.FieldMapping;
 import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
 import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
 import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
 import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
+import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
 import org.osgi.framework.BundleContext;
 import org.osgi.framework.Filter;
 import org.osgi.framework.InvalidSyntaxException;
@@ -252,7 +258,7 @@ public abstract class TrackingDereferenc
     }
     
     @Override
-    public final boolean dereference(UriRef uri, MGraph graph, boolean offlineMode, Lock writeLock) throws DereferenceException {
+    public final boolean dereference(UriRef uri, MGraph graph, Lock writeLock, DereferenceContext derefContext) throws DereferenceException {
         T service = getService();
         if(service == null){
             throw new DereferenceException(uri, serviceClass.getClass().getSimpleName() 
@@ -260,19 +266,24 @@ public abstract class TrackingDereferenc
         }
         Representation rep;
         try {
-            rep = getRepresentation(service, uri.getUnicodeString(), offlineMode);
+            rep = getRepresentation(service, uri.getUnicodeString(), derefContext.isOfflineMode());
         } catch(EntityhubException e){
             throw new DereferenceException(uri,e);
         }
+        //we need the languages as strings
+        final Set<String> langs = derefContext.getLanguages();
+        
         if(rep != null){
-            if(fieldMapper == null && ldpathProgram == null){
+            if(fieldMapper == null && ldpathProgram == null && langs.isEmpty()){
                 copyAll(uri, rep, graph, writeLock);
-            } else {
-                if(fieldMapper != null){
-                    copyMapped(uri, rep, graph, writeLock);
+            } else { //we need to apply some filters while dereferencing
+                if(fieldMapper != null || !langs.isEmpty()){
+                    //this considers speficied fields and included languages
+                    copyMapped(uri, rep, langs, graph, writeLock);
                 }
                 if(ldpathProgram != null){
-                    copyLdPath(uri, getRdfBackend(service), graph, writeLock);
+                    //this executes LDPath statements
+                    copyLdPath(uri, getRdfBackend(service), langs, graph, writeLock);
                 }
             }
             return true;
@@ -285,13 +296,14 @@ public abstract class TrackingDereferenc
      * writes the the results to the parsed Graph
      * @param uri the context
      * @param rdfBackend the RdfBackend the LDPath program is executed on
+     * @param langs the set of languages to dereference
      * @param graph the graph to store the results
      * @param writeLock the write lock for the graph
      * @throws DereferenceException on any {@link EntityhubException} while
      * executing the LDPath program
      */
-    protected void copyLdPath(UriRef uri, RDFBackend<Object> rdfBackend, 
-            MGraph graph, Lock writeLock) throws DereferenceException {
+    private void copyLdPath(UriRef uri, RDFBackend<Object> rdfBackend, 
+            Set<String> langs, MGraph graph, Lock writeLock) throws DereferenceException {
         //A RdfReference needs to be used as context
         RdfReference context = valueFactory.createReference(uri);
         //create the representation that stores results in an intermediate
@@ -303,13 +315,23 @@ public abstract class TrackingDereferenc
             for(at.newmedialab.ldpath.model.fields.FieldMapping<?,Object> mapping : ldpathProgram.getFields()) {
                 Collection<?> values = mapping.getValues(rdfBackend, context);
                 if(values != null && !values.isEmpty()){
-                    result.add(mapping.getFieldName(),values);
+                    String fieldName = mapping.getFieldName();
+                    if(langs.isEmpty()){
+                        result.add(fieldName,values);
+                    } else { //filter for languages
+                        for(Object value : values){
+                            if((!(value instanceof Text)) || 
+                                    langs.contains(((Text)value).getLanguage())){
+                                result.add(fieldName, value);
+                            } //else text with filtered language ... do not add
+                        }
+                    }
                 }
             }
         } catch (EntityhubException e){
             throw new DereferenceException(uri, e);
         }
-        if(!ldPathResults.isEmpty()){ //copy the resutls
+        if(!ldPathResults.isEmpty()){ //copy the results
             writeLock.lock();
             try {
                 graph.addAll(ldPathResults);
@@ -340,10 +362,27 @@ public abstract class TrackingDereferenc
      * in the graph
      * @param uri the uri of the entity to dereference
      * @param rep the data for the entity as in the entityhub
+     * @param langs the set of languages to dereference
      * @param graph the graph to store the mapping results
      * @param writeLock the write lock for the graph
      */
-    private void copyMapped(UriRef uri, Representation rep, MGraph graph, Lock writeLock) {
+    private void copyMapped(UriRef uri, Representation rep, Set<String> langs, 
+            MGraph graph, Lock writeLock) {
+        //init the fieldMapper
+        FieldMapper fieldMapper;
+        if(!langs.isEmpty()){ //if we need to filter for specific languages
+            //we need to modify the field and add a global filter for the
+            //languages. NOTE that the field might be null. In that case we
+            //need just filter literals by language
+            //TODO: maybe cache fieldMappers for sets of languages
+            fieldMapper = this.fieldMapper != null ? this.fieldMapper.clone() :
+                new DefaultFieldMapperImpl(ValueConverterFactory.getDefaultInstance());
+            fieldMapper.addMapping(new FieldMapping(new TextConstraint(
+                (String)null, langs.toArray(new String[graph.size()]))));
+        } else { //just use the fieldMapper as parsed in the config
+            fieldMapper = this.fieldMapper;
+        }
+        //execute the field mappings
         writeLock.lock();
         try {
             RdfRepresentation clerezzaRep = valueFactory.createRdfRepresentation(uri, graph);

Modified: stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1547023&r1=1547022&r2=1547023&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ stanbol/trunk/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties Mon Dec  2 14:03:53 2013
@@ -22,6 +22,12 @@ service.ranking.description=If two enhan
 one with the higher ranking will be used to process parsed content items.
 
 
+org.apache.stanbol.enhancer.engines.dereference.entityhub.EntityhubDereferenceEngine.name=Apache \
+Stanbol Enhancer Engine: Entityhub Dereference
+org.apache.stanbol.enhancer.engines.dereference.entityhub.EntityhubDereferenceEngine.description=Enhancement \
+Engine that dereferences Entities by using the Stanbol Entityhub. This engine supports single Sites, \
+, SiteManager as well as the Entityhub itself.
+
 #===============================================================================
 #Properties and Options used to configure 
 #===============================================================================
@@ -38,3 +44,7 @@ information to be included for dereferen
 dereference rules. As an example 'schema:name = .[rdf:type is foaf:Person]/fn:concat(\
 foaf:given," ",foaf:surname)' would construct the schema:name value by concatenation \
 of the foaf:given and foaf:surname if the entity is of type foaf:Person.  
+
+enhancer.engines.dereference.entityhub.siteId.name=Site
+enhancer.engines.dereference.entityhub.siteId.description=The name of the Entityhub Site, \
+'entityhub' for the Entityhub or '*' to dereference Entities using all active sites.