You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/11/25 13:09:16 UTC

svn commit: r1545240 - in /stanbol/trunk/enhancement-engines: ./ dereference/ dereference/core/ dereference/core/src/ dereference/core/src/license/ dereference/core/src/main/ dereference/core/src/main/java/ dereference/core/src/main/java/org/ dereferen...

Author: rwesten
Date: Mon Nov 25 12:09:15 2013
New Revision: 1545240

URL: http://svn.apache.org/r1545240
Log:
STANBOL-336, STANBOL-1222: EntityDereference core module (first commit)

Added:
    stanbol/trunk/enhancement-engines/dereference/
    stanbol/trunk/enhancement-engines/dereference/core/   (with props)
    stanbol/trunk/enhancement-engines/dereference/core/pom.xml
    stanbol/trunk/enhancement-engines/dereference/core/src/
    stanbol/trunk/enhancement-engines/dereference/core/src/license/
    stanbol/trunk/enhancement-engines/dereference/core/src/license/THIRD-PARTY.properties
    stanbol/trunk/enhancement-engines/dereference/core/src/main/
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceException.java
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java
    stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java
    stanbol/trunk/enhancement-engines/dereference/core/src/main/resources/
    stanbol/trunk/enhancement-engines/dereference/core/src/test/
    stanbol/trunk/enhancement-engines/dereference/core/src/test/java/
    stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/
    stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/
    stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/
    stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/
    stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/
    stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/
    stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java
    stanbol/trunk/enhancement-engines/dereference/core/src/test/resources/
Modified:
    stanbol/trunk/enhancement-engines/pom.xml

Propchange: stanbol/trunk/enhancement-engines/dereference/core/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 25 12:09:15 2013
@@ -0,0 +1,7 @@
+target
+
+.project
+
+.settings
+
+.classpath

Added: stanbol/trunk/enhancement-engines/dereference/core/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/pom.xml?rev=1545240&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/pom.xml (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/pom.xml Mon Nov 25 12:09:15 2013
@@ -0,0 +1,122 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.stanbol</groupId>
+    <artifactId>apache-stanbol-enhancement-engines-entitylinking</artifactId>
+    <version>1.0.0-SNAPSHOT</version>
+    <relativePath>..</relativePath>
+  </parent>
+
+  <artifactId>org.apache.stanbol.enhancer.engines.dereference.core</artifactId>
+  <packaging>bundle</packaging>
+
+  <name>Apache Stanbol Enhancement Engine : Entity Dereference Core</name>
+  <description>
+    Implementation of an Entity Dereferencing Enigne defining an Extension point
+    for actual dereferencing implementations.
+  </description>
+
+  <inceptionYear>2013</inceptionYear>
+
+  <scm>
+    <connection>
+      scm:svn:http://svn.apache.org/repos/asf/stanbol/trunk/enhancement-engines/dereference/core/
+    </connection>
+    <developerConnection>
+      scm:svn:https://svn.apache.org/repos/asf/stanbol/trunk/enhancement-engines/dereference/core/
+    </developerConnection>
+    <url>http://stanbol.apache.org/</url>
+  </scm>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <instructions>
+            <Import-Package>
+              org.apache.stanbol.enhancer.servicesapi; provide:=true; version="[0.11,1.1)",
+              org.apache.stanbol.enhancer.engines.dereference; provide:=true,
+              *
+            </Import-Package>
+            <Export-Package>
+              org.apache.stanbol.enhancer.engines.dereference;version=${project.version}
+            </Export-Package>
+            <!-- Private-Package>
+            </Private-Package -->
+          </instructions>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-scr-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
+      <version>1.0.0-SNAPSHOT</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+      <version>1.0.0-SNAPSHOT</version>
+    </dependency>
+
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+
+    <!-- Testing -->
+     <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+      <version>1.0.0-SNAPSHOT</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>      
+    </dependency>
+    <dependency>  <!-- used for debug level logging during tests -->
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-simple</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+
+</project>

Added: stanbol/trunk/enhancement-engines/dereference/core/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/license/THIRD-PARTY.properties?rev=1545240&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/license/THIRD-PARTY.properties (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/license/THIRD-PARTY.properties Mon Nov 25 12:09:15 2013
@@ -0,0 +1,24 @@
+# Generated by org.codehaus.mojo.license.AddThirdPartyMojo
+#-------------------------------------------------------------------------------
+# Already used licenses in project :
+# - Apache Software License
+# - Apache Software License, Version 2.0
+# - BSD License
+# - Common Development And Distribution License (CDDL), Version 1.0
+# - Common Development And Distribution License (CDDL), Version 1.1
+# - Common Public License, Version 1.0
+# - Eclipse Public License, Version 1.0
+# - GNU General Public License (GPL), Version 2 with classpath exception
+# - GNU Lesser General Public License (LGPL)
+# - GNU Lesser General Public License (LGPL), Version 2.1
+# - ICU License
+# - MIT License
+# - New BSD License
+# - Public Domain License
+#-------------------------------------------------------------------------------
+# Please fill the missing licenses for dependencies :
+#
+#
+#Mon Nov 25 13:04:19 CET 2013
+org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
+org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0

Added: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceException.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceException.java?rev=1545240&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceException.java (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceException.java Mon Nov 25 12:09:15 2013
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dereference;
+
+import org.apache.clerezza.rdf.core.UriRef;
+
+public class DereferenceException extends Exception {
+    
+    private static final long serialVersionUID = 1524436328783083428L;
+
+    public DereferenceException(UriRef entity, Throwable t){
+        super("Unable to dereference Entity " + entity+ "!", t);
+    }
+    
+}

Added: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java?rev=1545240&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java Mon Nov 25 12:09:15 2013
@@ -0,0 +1,281 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dereference;
+
+import static org.apache.stanbol.enhancer.servicesapi.ServiceProperties.ENHANCEMENT_ENGINE_ORDERING;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+
+import java.io.IOError;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.locks.Lock;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.commons.lang.StringUtils;
+import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class EntityDereferenceEngine implements EnhancementEngine, ServiceProperties {
+
+    private final Logger log = LoggerFactory.getLogger(EntityDereferenceEngine.class);
+    
+    /**
+     * By default the EntityDereferenceEngine does use {@link ServiceProperties#ORDERING_POST_PROCESSING}
+     */
+    public static final int DEFAULT_ENGINE_ORDERING = ServiceProperties.ORDERING_POST_PROCESSING;
+    
+    /**
+     * If the offline mode is enabled enforced for dereferencing Entities
+     */
+    private boolean offline;
+    
+    protected final EntityDereferencer dereferencer;
+    
+    protected final String name;
+    
+    /**
+     * The Map holding the {@link #serviceProperties} for this engine.
+     */
+    protected final Map<String,Object> serviceProperties = new HashMap<String,Object>();
+    /**
+     * Unmodifiable view over {@link #serviceProperties} returned by
+     * {@link #getServiceProperties()}
+     */
+    private final Map<String,Object> unmodServiceProperties = Collections.unmodifiableMap(serviceProperties);
+    
+    public EntityDereferenceEngine(String name, EntityDereferencer dereferencer){
+        if(StringUtils.isBlank(name)){
+            throw new IllegalArgumentException("The parsed EnhancementEngine name MUST NOT be NULL nor empty!");
+        }
+        this.name = name;
+        if(dereferencer == null){
+            throw new IllegalArgumentException("The parsed EntityDereferencer MUST NOT be NULL!");
+        }
+        this.dereferencer = dereferencer;
+    }
+    
+    /**
+     * Setter for the offline mode. This method is typically called of
+     * {@link OfflineMode} is injected to the component registering an instance
+     * of this Engine implementation
+     * @param mode the offline mode
+     */
+    public void setOfflineMode(boolean mode){
+        this.offline = mode;
+    }
+    
+    public boolean isOfflineMode(){
+        return offline;
+    }
+    /**
+     * Setter for the {@link ServiceProperties#ENHANCEMENT_ENGINE_ORDERING
+     * engine ordering}.
+     * @param ordering The ordering or <code>null</code> to set the 
+     * {@value #DEFAULT_ENGINE_ORDERING default} for this engine.
+     */
+    public void setEngineOrdering(Integer ordering){
+        serviceProperties.put(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING, 
+            ordering == null ? DEFAULT_ENGINE_ORDERING : ordering);
+    }
+    
+    public Integer getEngineOrdering(){
+        return (Integer)serviceProperties.get(ENHANCEMENT_ENGINE_ORDERING);
+    }
+    
+    @Override
+    public Map<String,Object> getServiceProperties() {
+        return unmodServiceProperties;
+    }
+
+    @Override
+    public int canEnhance(ContentItem ci) throws EngineException {
+        if(offline && !dereferencer.supportsOfflineMode()){
+            return CANNOT_ENHANCE;
+        } else {
+            return ENHANCE_ASYNC;
+        }
+    }
+
+    @Override
+    public void computeEnhancements(ContentItem ci) throws EngineException {
+        if(offline && !dereferencer.supportsOfflineMode()){
+            //entity dereferencer does no longer support offline mode
+            return;
+        }
+        log.debug("> dereference Entities for ContentItem {}", ci.getUri());
+        final MGraph metadata = ci.getMetadata();
+        Set<UriRef> referencedEntities = new HashSet<UriRef>();
+        //(1) read all Entities we need to dereference from the parsed contentItem
+        ci.getLock().readLock().lock();
+        try {
+            Iterator<Triple> entityReferences = metadata.filter(null, ENHANCER_ENTITY_REFERENCE, null);
+            while(entityReferences.hasNext()){
+                Triple triple = entityReferences.next();
+                Resource entityReference = triple.getObject();
+                if(entityReference instanceof UriRef){
+                    boolean added = referencedEntities.add((UriRef)entityReference);
+                    if(added && log.isTraceEnabled()){
+                        log.trace("  ... schedule Entity {}", entityReference);
+                    }
+                } else if(log.isWarnEnabled()){
+                    //log enhancement that use a fise:entiy-reference with a non UriRef value!
+                    NonLiteral enhancement = triple.getSubject();
+                    log.warn("Can not dereference invalid Enhancement {}",enhancement);
+                    for(Iterator<Triple> it = metadata.filter(enhancement, null, null);it.hasNext();){
+                        log.warn("   {}", it.next());
+                    }
+                }
+            }
+        } finally {
+            ci.getLock().readLock().unlock();
+        }
+        final Lock writeLock = ci.getLock().writeLock();
+        log.trace(" - scheduled {} Entities for dereferencing", referencedEntities.size());
+        //(2) dereference the Entities
+        ExecutorService executor = dereferencer.getExecutor();
+        long start = System.currentTimeMillis();
+        Set<UriRef> failedEntities = new HashSet<UriRef>();
+        int dereferencedCount = 0;
+        List<DereferenceJob> dereferenceJobs = new ArrayList<DereferenceJob>(referencedEntities.size());
+        if(executor != null && !executor.isShutdown()){ //dereference using executor
+            //schedule all entities to dereference
+            for(final UriRef entity : referencedEntities){
+                DereferenceJob dereferenceJob = new DereferenceJob(entity, metadata, writeLock);
+                dereferenceJob.setFuture(executor.submit(dereferenceJob));
+                dereferenceJobs.add(dereferenceJob);
+            }
+            //wait for all entities to be dereferenced
+            for(DereferenceJob dereferenceJob : dereferenceJobs){
+                try {
+                    if(dereferenceJob.await()){
+                        dereferencedCount++;
+                    }
+                } catch (InterruptedException e) {
+                    // Restore the interrupted status
+                    Thread.currentThread().interrupt();
+                    throw new EngineException(this, ci, 
+                        "Interupted while waiting for dereferencing Entities", e);
+                } catch (ExecutionException e) {
+                    if(e.getCause() instanceof DereferenceException){
+                        failedEntities.add(dereferenceJob.entity);
+                        log.debug(" ... error while dereferencing " 
+                            + dereferenceJob.entity + "!", e);
+                    } else { //unknown error
+                        throw new EngineException(this,ci, "Unchecked Error while "
+                            + "dereferencing Entity " + dereferenceJob.entity +"!", e);
+                    }
+                }
+            }
+        } else { //dereference using the current thread
+            for(UriRef entity : referencedEntities){
+                try {
+                    log.trace("  ... dereference {}", entity);
+                    if(dereferencer.dereference(entity, metadata, offline, writeLock)){
+                        dereferencedCount++;
+                        log.trace("    + success");
+                    } else {
+                        log.trace("    - not found");
+                    }
+                } catch (DereferenceException e) {
+                    log.debug(" ... error while dereferencing " + entity + "!", e);
+                    failedEntities.add(entity);
+                }
+            }
+        }
+        long duration = System.currentTimeMillis() - start;
+        if(!failedEntities.isEmpty()){
+            log.warn(" - unable to dereference {} of {} for ContentItem {}",
+                new Object[] {failedEntities.size(),referencedEntities.size(), 
+                    ci.getUri()});
+        }
+        if(log.isDebugEnabled()){
+            log.debug(" - dereferenced {} of {} Entities in {}ms ({}ms/dereferenced)", 
+                new Object[]{dereferencedCount, referencedEntities.size(),
+                    duration, (duration*100/dereferencedCount)/100.0f});
+        }
+        
+    }
+
+    @Override
+    public String getName() {
+        return name;
+    }
+
+    /**
+     * Used both as {@link Callable} submitted to the {@link ExecutorService}
+     * and as object to {@link #await()} the completion of the task.
+     * @author Rupert Westenthaler
+     *
+     */
+    class DereferenceJob implements Callable<Boolean> {
+        
+        final UriRef entity;
+        final MGraph metadata;
+        final Lock writeLock;
+
+        private Future<Boolean> future;
+        
+        DereferenceJob(UriRef entity, MGraph metadata, Lock writeLock){
+            this.entity = entity;
+            this.metadata = metadata;
+            this.writeLock = writeLock;
+        }
+        
+        @Override
+        public Boolean call() throws DereferenceException {
+            log.trace("  ... dereference {}", entity);
+            boolean state = dereferencer.dereference(entity, metadata, offline, writeLock);
+            if(state){
+                log.trace("    + success");
+            } else {
+                log.trace("    - not found");
+            }
+            return state;
+        }
+
+        void setFuture(Future<Boolean> future){
+            this.future = future;
+        }
+        
+        public boolean await() throws InterruptedException, ExecutionException {
+            return future.get();
+        }
+    }
+    
+}

Added: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java?rev=1545240&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java Mon Nov 25 12:09:15 2013
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dereference;
+
+import java.util.ConcurrentModificationException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.locks.Lock;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+
+/**
+ * Interface used by the {@link EntityDereferenceEngine} to dereference
+ * Entities
+ * 
+ * @author Rupert Westenthaler
+ *
+ */
+public interface EntityDereferencer {
+
+    /**
+     * If this EntityDereferences can dereference Entities when in 
+     * {@link OfflineMode}. This method is expected to only return <code>false</code>
+     * when an implementation can not dereference any Entities when in offline
+     * mode. If some (e.g. locally cached) Entities can be dereferenced
+     * the dereferences should return <code>true<code> and just ignore calles
+     * for Entities that are not locally available.
+     * @return the {@link OfflineMode} status
+     */
+    boolean supportsOfflineMode();
+    
+    /**
+     * EntityDereferencer can optionally provide an ExecutorService used to
+     * dereference Entities. 
+     * @return the {@link ExecutorService} or <code>null</code> if not used
+     * by this implementation
+     */
+    ExecutorService getExecutor();
+
+    /**
+     * Dereferences the Entity with the parsed {@link UriRef} by copying the
+     * data to the parsed graph
+     * @param graph the graph to add the dereferenced entity 
+     * @param entity the uri of the Entity to dereference
+     * @param offlineMode <code>true</code> if {@link OfflineMode} is active.
+     * Otherwise <code>false</code>
+     * @param writeLock The writeLock for the graph. Dereferences MUST require
+     * a <code>{@link Lock#lock() writeLock#lock()}</code>  before adding 
+     * dereferenced data to the parsed graph. This is essential for using multiple 
+     * threads  to dereference Entities. Failing to do so will cause
+     * {@link ConcurrentModificationException}s in this implementations or
+     * other components (typically other {@link EnhancementEngine}s) accessing the
+     * same graph.
+     * @return if the entity was dereferenced
+     * @throws DereferenceException on any error while dereferencing the
+     * requested Entity
+     */
+    boolean dereference(UriRef entity, MGraph graph, boolean offlineMode, 
+            Lock writeLock) throws DereferenceException;
+        
+}

Added: stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java?rev=1545240&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java Mon Nov 25 12:09:15 2013
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dereference;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.SKOS_CONCEPT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDFS_LABEL;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Random;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.locks.Lock;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TripleCollection;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.stanbol.commons.indexedgraph.IndexedMGraph;
+import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
+import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * TODO: convert this to an integration test!
+ * @author Rupert Westenthaler
+ */
+public class DereferenceEngineTest {
+    
+    private final static Logger log = LoggerFactory.getLogger(DereferenceEngineTest.class);
+
+    //TODO: test implementations of EntityDereferencer
+    static EntityDereferencer asyncDereferencer = new TestDereferencer(Executors.newFixedThreadPool(4));
+    static EntityDereferencer syncDereferencer = new TestDereferencer(null);
+
+    /**
+     * The metadata used by this test
+     */
+    private static TripleCollection testData;
+    
+    private static TripleCollection testMetadata;
+    
+    public static final UriRef NAME = new UriRef(NamespaceEnum.rdfs+"label");
+    public static final UriRef TYPE = new UriRef(NamespaceEnum.rdf+"type");
+    public static final UriRef REDIRECT = new UriRef(NamespaceEnum.rdfs+"seeAlso");
+
+    private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
+    
+    private static final LiteralFactory lf = LiteralFactory.getInstance();
+    private static final UriRef SKOS_NOTATION = new UriRef(NamespaceEnum.skos+"notation");
+    private static final Language LANG_EN = new Language("en");
+    private static final Language LANG_DE = new Language("de");
+
+    private static final int NUM_ENTITIES = 1000;
+    
+    public static final float PERCENTAGE_LINKED = 0.3f;
+    public static final float PERCENTAGE_PRESENT = 0.9f;
+    
+    @BeforeClass
+    public static void setUpServices() throws IOException {
+        testData = new IndexedMGraph();
+        long seed = System.currentTimeMillis();
+        log.info("Test seed "+ seed);
+        Random random = new Random(seed);
+        int numEntities = 0;
+        for(int i = 0; i < NUM_ENTITIES ; i++){
+            if(random.nextFloat() <= PERCENTAGE_PRESENT){ //do not create all entities
+                UriRef uri = new UriRef("urn:test:entity"+i);
+                testData.add(new TripleImpl(uri, RDF_TYPE, SKOS_CONCEPT));
+                testData.add(new TripleImpl(uri, RDFS_LABEL, 
+                    new PlainLiteralImpl("entity "+i, LANG_EN)));
+                testData.add(new TripleImpl(uri, RDFS_LABEL, 
+                    new PlainLiteralImpl("Entity "+i, LANG_DE)));
+                testData.add(new TripleImpl(uri, SKOS_NOTATION, 
+                    lf.createTypedLiteral(i)));
+                numEntities++;
+            }
+        }
+        log.info(" ... created {} Entities",numEntities);
+        testMetadata = new IndexedMGraph();
+        int numLinks = 0;
+        for(int i = 0; i < NUM_ENTITIES ; i++){
+            if(random.nextFloat() < PERCENTAGE_LINKED){
+                UriRef enhancementUri = new UriRef("urn:test:enhancement"+i);
+                UriRef entityUri = new UriRef("urn:test:entity"+i);
+                //we do not need any other triple for testing in the contentItem
+                testMetadata.add(new TripleImpl(enhancementUri, ENHANCER_ENTITY_REFERENCE, entityUri));
+                numLinks++;
+            }
+        }
+        log.info(" ... created {} Entity references ", numLinks);
+
+    }
+
+    public static ContentItem getContentItem(final String id) throws IOException {
+        ContentItem ci = ciFactory.createContentItem(new UriRef(id), new StringSource("Not used"));
+        ci.getMetadata().addAll(testMetadata);
+        return ci;
+    }
+    /**
+     * Test {@link OfflineMode} functionality
+     * @throws Exception
+     */
+    @Test
+    public void testOfflineMode() throws Exception {
+        ContentItem ci = getContentItem("urn:test:testOfflineMode");
+        EntityDereferencer onlineDereferencer = new TestDereferencer(null){
+          @Override
+            public boolean supportsOfflineMode() {
+                return false;
+            }  
+        };
+        EntityDereferenceEngine engine = new EntityDereferenceEngine("online", onlineDereferencer);
+        //engine in online mode
+        Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
+        //set engine in offline mode
+        engine.setOfflineMode(true);
+        Assert.assertEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
+    }
+
+    @Test
+    public void testSyncDereferencing() throws Exception {
+        ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
+        EntityDereferenceEngine engine = new EntityDereferenceEngine("sync", syncDereferencer);
+        Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
+        engine.computeEnhancements(ci);
+        validateDereferencedEntities(ci.getMetadata());
+    }
+
+    @Test
+    public void testAsyncDereferencing() throws Exception {
+        ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
+        EntityDereferenceEngine engine = new EntityDereferenceEngine("sync", asyncDereferencer);
+        Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
+        engine.computeEnhancements(ci);
+        validateDereferencedEntities(ci.getMetadata());
+    }
+
+    private void validateDereferencedEntities(TripleCollection metadata) {
+        Iterator<Triple> referenced = metadata.filter(null, ENHANCER_ENTITY_REFERENCE, null);
+        MGraph expected = new IndexedMGraph();
+        while(referenced.hasNext()){
+            UriRef entity = (UriRef)referenced.next().getObject();
+            Iterator<Triple> entityTriples = testData.filter(entity, null, null);
+            while(entityTriples.hasNext()){
+                expected.add(entityTriples.next());
+            }
+        }
+        MGraph notExpected = new IndexedMGraph(testData);
+        notExpected.removeAll(expected);
+        Assert.assertTrue(metadata.containsAll(expected));
+        Assert.assertTrue(Collections.disjoint(metadata, notExpected));
+    }
+
+    private static class TestDereferencer implements EntityDereferencer {
+
+        private final ExecutorService executorService;
+        
+        public TestDereferencer(ExecutorService executorService) {
+            this.executorService = executorService;
+        }
+
+        @Override
+        public boolean supportsOfflineMode() {
+            return true;
+        }
+
+        @Override
+        public ExecutorService getExecutor() {
+            return executorService;
+        }
+
+        @Override
+        public boolean dereference(UriRef entity, MGraph graph, boolean offlineMode, Lock writeLock) throws DereferenceException {
+            Iterator<Triple> entityTriples = testData.filter(entity, null, null);
+            if(entityTriples.hasNext()){
+                writeLock.lock();
+                try {
+                    do {
+                        graph.add(entityTriples.next());
+                    } while (entityTriples.hasNext());
+                } finally {
+                    writeLock.unlock();
+                }
+                return true;
+            } else {
+                return false;
+            }
+        }
+        
+    }
+    
+}

Modified: stanbol/trunk/enhancement-engines/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/pom.xml?rev=1545240&r1=1545239&r2=1545240&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/pom.xml (original)
+++ stanbol/trunk/enhancement-engines/pom.xml Mon Nov 25 12:09:15 2013
@@ -102,7 +102,10 @@
     <!-- converts TextAnnotations to the STANBOL-987 model -->
     <module>textannotationnewmodel</module> 
     <!-- finds co-mentions of Entities earlier mentioned on the Text (STANBOL-1070)  -->
-    <module>entitycomention</module> 
+    <module>entitycomention</module>
+    
+    <!-- Entity Dereference (STANBOL-336) -->
+    <module>dereference/core</module>
     
     <!-- Enhancement Engines using external services -->
     <module>celi</module> <!-- http://linguagrid.org -->